PTHLexer.cpp revision 5f074266cc59563036c40516c814d63825723e20
1274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
3274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//                     The LLVM Compiler Infrastructure
4274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
5274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// This file is distributed under the University of Illinois Open Source
6274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// License. See LICENSE.TXT for details.
7274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
8274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===----------------------------------------------------------------------===//
9274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
10274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// This file implements the PTHLexer interface.
11274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
12274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===----------------------------------------------------------------------===//
13274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/TokenKinds.h"
150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/FileManager.h"
160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/IdentifierTable.h"
17274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek#include "clang/Lex/PTHLexer.h"
18274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek#include "clang/Lex/Preprocessor.h"
190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/PTHManager.h"
200c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/Token.h"
210c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/Preprocessor.h"
220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/Support/Compiler.h"
230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/Support/MemoryBuffer.h"
240c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/ADT/StringMap.h"
250c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/ADT/OwningPtr.h"
265f074266cc59563036c40516c814d63825723e20Ted Kremenek#include "llvm/Support/Streams.h"
270c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
28274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenekusing namespace clang;
29274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
3018d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek#define DISK_TOKEN_SIZE (1+1+3+4+2)
31268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
32e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
33e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// Utility methods for reading from the mmap'ed PTH file.
34e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
35e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
36e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekstatic inline uint8_t Read8(const char*& data) {
37e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  return (uint8_t) *(data++);
38e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek}
39e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
40e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekstatic inline uint32_t Read32(const char*& data) {
41e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  uint32_t V = (uint32_t) Read8(data);
42e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  V |= (((uint32_t) Read8(data)) << 8);
43e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  V |= (((uint32_t) Read8(data)) << 16);
44e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  V |= (((uint32_t) Read8(data)) << 24);
45e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  return V;
46e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek}
47e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
48e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
49e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// PTHLexer methods.
50e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
51e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
520c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
5332a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek                   const char* ppcond,
545f074266cc59563036c40516c814d63825723e20Ted Kremenek                   PTHSpellingSearch& mySpellingSrch,
5532a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek                   PTHManager& PM)
56cd223444d1680290efe11da657faafc9a1ac14baTed Kremenek  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
575f074266cc59563036c40516c814d63825723e20Ted Kremenek    PPCond(ppcond), CurPPCondPtr(ppcond), MySpellingSrch(mySpellingSrch),
585f074266cc59563036c40516c814d63825723e20Ted Kremenek    PTHMgr(PM)
595f074266cc59563036c40516c814d63825723e20Ted Kremenek{
605f074266cc59563036c40516c814d63825723e20Ted Kremenek  FileID = fileloc.getFileID();
615f074266cc59563036c40516c814d63825723e20Ted Kremenek}
62274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
63e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekvoid PTHLexer::Lex(Token& Tok) {
64e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted KremenekLexNextToken:
65866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
66866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
67866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Read the raw token data.
68866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
69e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
70866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Shadow CurPtr into an automatic variable.
71866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  const unsigned char *CurPtrShadow = (const unsigned char*) CurPtr;
72866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
73866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Read in the data for the token.  14 bytes in total.
74866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  tok::TokenKind k = (tok::TokenKind) CurPtrShadow[0];
75866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Token::TokenFlags flags = (Token::TokenFlags) CurPtrShadow[1];
76866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
776b1c9708c8fca3786c766b6d1869721656b31322Ted Kremenek  uint32_t perID = ((uint32_t) CurPtrShadow[2])
78866bdf74547efe32c320554837ffce00fcc084feTed Kremenek      | (((uint32_t) CurPtrShadow[3]) << 8)
7918d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek      | (((uint32_t) CurPtrShadow[4]) << 16);
800c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
8118d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek  uint32_t FileOffset = ((uint32_t) CurPtrShadow[5])
8218d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek      | (((uint32_t) CurPtrShadow[6]) << 8)
8318d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek      | (((uint32_t) CurPtrShadow[7]) << 16)
8418d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek      | (((uint32_t) CurPtrShadow[8]) << 24);
85e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
8618d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek  uint32_t Len = ((uint32_t) CurPtrShadow[9])
8718d9afb815bd8aff885dd64c5078760b3398d7beTed Kremenek      | (((uint32_t) CurPtrShadow[10]) << 8);
88e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
89866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  CurPtr = (const char*) (CurPtrShadow + DISK_TOKEN_SIZE);
90e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
91866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
92866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Construct the token itself.
93866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
94e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
95866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.startToken();
96866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.setKind(k);
9759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  Tok.setFlag(flags);
9859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  assert(!LexingRawMode);
996b1c9708c8fca3786c766b6d1869721656b31322Ted Kremenek  Tok.setIdentifierInfo(perID ? PTHMgr.GetIdentifierInfo(perID-1) : 0);
100866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.setLocation(SourceLocation::getFileLoc(FileID, FileOffset));
101866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.setLength(Len);
10289d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek
103866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
104866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Process the token.
105866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
1065f074266cc59563036c40516c814d63825723e20Ted Kremenek#if 0
1075f074266cc59563036c40516c814d63825723e20Ted Kremenek  SourceManager& SM = PP->getSourceManager();
1085f074266cc59563036c40516c814d63825723e20Ted Kremenek  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
1095f074266cc59563036c40516c814d63825723e20Ted Kremenek    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
1105f074266cc59563036c40516c814d63825723e20Ted Kremenek    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
1115f074266cc59563036c40516c814d63825723e20Ted Kremenek    << '\n';
1125f074266cc59563036c40516c814d63825723e20Ted Kremenek#endif
11389d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek
11459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  if (k == tok::identifier) {
11559d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    MIOpt.ReadToken();
11659d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    return PP->HandleIdentifier(Tok);
11759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  }
11859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
11959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  if (k == tok::eof) {
120e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    // Save the end-of-file token.
121e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    EofToken = Tok;
122e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
123cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    Preprocessor *PPCache = PP;
12459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
12559d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!ParsingPreprocessorDirective);
12659d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!LexingRawMode);
12759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
12859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    // FIXME: Issue diagnostics similar to Lexer.
12959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    if (PP->HandleEndOfFile(Tok, false))
130d6f53dc4951aace69014619761760addac9e59ecTed Kremenek      return;
13159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
132cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
133cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    return PPCache->Lex(Tok);
134cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek  }
135d6f53dc4951aace69014619761760addac9e59ecTed Kremenek
13659d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  if (k == tok::hash && Tok.isAtStartOfLine()) {
13759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
13859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!LexingRawMode);
13959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    PP->HandleDirective(Tok);
140d6f53dc4951aace69014619761760addac9e59ecTed Kremenek
14159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    if (PP->isCurrentLexer(this))
14259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek      goto LexNextToken;
143e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
14459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    return PP->Lex(Tok);
145e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  }
146e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
14759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  if (k == tok::eom) {
14859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(ParsingPreprocessorDirective);
14959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    ParsingPreprocessorDirective = false;
15059d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    return;
15159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  }
152274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
15359d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  MIOpt.ReadToken();
154cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek}
155cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek
156e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// FIXME: We can just grab the last token instead of storing a copy
157e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// into EofToken.
15859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenekvoid PTHLexer::getEOF(Token& Tok) {
159defb7094c835998bb821e894253287625ce8c74dTed Kremenek  assert(EofToken.is(tok::eof));
1600c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  Tok = EofToken;
161274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek}
16217ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek
16317ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenekvoid PTHLexer::DiscardToEndOfLine() {
16417ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
16517ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek         "Must be in a preprocessing directive!");
1664d35da2e41941965bbee8ed7e8c30e7c21000d71Ted Kremenek
167e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // We assume that if the preprocessor wishes to discard to the end of
168e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // the line that it also means to end the current preprocessor directive.
169e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  ParsingPreprocessorDirective = false;
170e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
17174c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // Skip tokens by only peeking at their token kind and the flags.
17274c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // We don't need to actually reconstruct full tokens from the token buffer.
17374c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // This saves some copies and it also reduces IdentifierInfo* lookup.
17474c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  const char* p = CurPtr;
17574c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  while (1) {
17674c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Read the token kind.  Are we at the end of the file?
17774c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
17874c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    if (x == tok::eof) break;
179e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
18074c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Read the token flags.  Are we at the start of the next line?
18174c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
18280d2f3059326f99ebf7c867db1c7f106ec9485f5Ted Kremenek    if (y & Token::StartOfLine) break;
18374c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek
18474c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Skip to the next token.
18574c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    p += DISK_TOKEN_SIZE;
18674c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  }
18774c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek
18874c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  CurPtr = p;
18917ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek}
1900c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
191268ee7016a2811803989487c0ad3799486092c63Ted Kremenek/// SkipBlock - Used by Preprocessor to skip the current conditional block.
192268ee7016a2811803989487c0ad3799486092c63Ted Kremenekbool PTHLexer::SkipBlock() {
193268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(CurPPCondPtr && "No cached PP conditional information.");
194268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(LastHashTokPtr && "No known '#' token.");
195268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
19641a2660377d215d004fe413c03874bd066b5384cTed Kremenek  const char* HashEntryI = 0;
197268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  uint32_t Offset;
198268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  uint32_t TableIdx;
199268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
200268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  do {
20141a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Read the token offset from the side-table.
202268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    Offset = Read32(CurPPCondPtr);
20341a2660377d215d004fe413c03874bd066b5384cTed Kremenek
20441a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Read the target table index from the side-table.
205268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    TableIdx = Read32(CurPPCondPtr);
20641a2660377d215d004fe413c03874bd066b5384cTed Kremenek
20741a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Compute the actual memory address of the '#' token data for this entry.
20841a2660377d215d004fe413c03874bd066b5384cTed Kremenek    HashEntryI = TokBuf + Offset;
20941a2660377d215d004fe413c03874bd066b5384cTed Kremenek
21041a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
21141a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  contain nested blocks.  In the side-table we can jump over these
21241a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  nested blocks instead of doing a linear search if the next "sibling"
21341a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  entry is not at a location greater than LastHashTokPtr.
21441a2660377d215d004fe413c03874bd066b5384cTed Kremenek    if (HashEntryI < LastHashTokPtr && TableIdx) {
21541a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // In the side-table we are still at an entry for a '#' token that
21641a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // is earlier than the last one we saw.  Check if the location we would
21741a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // stride gets us closer.
21841a2660377d215d004fe413c03874bd066b5384cTed Kremenek      const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
21941a2660377d215d004fe413c03874bd066b5384cTed Kremenek      assert(NextPPCondPtr >= CurPPCondPtr);
22041a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // Read where we should jump to.
22141a2660377d215d004fe413c03874bd066b5384cTed Kremenek      uint32_t TmpOffset = Read32(NextPPCondPtr);
22241a2660377d215d004fe413c03874bd066b5384cTed Kremenek      const char* HashEntryJ = TokBuf + TmpOffset;
22341a2660377d215d004fe413c03874bd066b5384cTed Kremenek
22441a2660377d215d004fe413c03874bd066b5384cTed Kremenek      if (HashEntryJ <= LastHashTokPtr) {
22541a2660377d215d004fe413c03874bd066b5384cTed Kremenek        // Jump directly to the next entry in the side table.
22641a2660377d215d004fe413c03874bd066b5384cTed Kremenek        HashEntryI = HashEntryJ;
22741a2660377d215d004fe413c03874bd066b5384cTed Kremenek        Offset = TmpOffset;
22841a2660377d215d004fe413c03874bd066b5384cTed Kremenek        TableIdx = Read32(NextPPCondPtr);
22941a2660377d215d004fe413c03874bd066b5384cTed Kremenek        CurPPCondPtr = NextPPCondPtr;
23041a2660377d215d004fe413c03874bd066b5384cTed Kremenek      }
23141a2660377d215d004fe413c03874bd066b5384cTed Kremenek    }
232268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  }
23341a2660377d215d004fe413c03874bd066b5384cTed Kremenek  while (HashEntryI < LastHashTokPtr);
23441a2660377d215d004fe413c03874bd066b5384cTed Kremenek  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
235268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(TableIdx && "No jumping from #endifs.");
236268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
237268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Update our side-table iterator.
238268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
239268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(NextPPCondPtr >= CurPPCondPtr);
240268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  CurPPCondPtr = NextPPCondPtr;
241268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
242268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Read where we should jump to.
24341a2660377d215d004fe413c03874bd066b5384cTed Kremenek  HashEntryI = TokBuf + Read32(NextPPCondPtr);
244268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  uint32_t NextIdx = Read32(NextPPCondPtr);
245268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
246268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // By construction NextIdx will be zero if this is a #endif.  This is useful
247268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // to know to obviate lexing another token.
248268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  bool isEndif = NextIdx == 0;
249268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
250268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // This case can occur when we see something like this:
251268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //
252268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //  #if ...
253268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //   /* a comment or nothing */
254268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //  #elif
255268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //
256268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // If we are skipping the first #if block it will be the case that CurPtr
257268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // already points 'elif'.  Just return.
258268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
25941a2660377d215d004fe413c03874bd066b5384cTed Kremenek  if (CurPtr > HashEntryI) {
26041a2660377d215d004fe413c03874bd066b5384cTed Kremenek    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
261268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    // Did we reach a #endif?  If so, go ahead and consume that token as well.
262268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    if (isEndif)
263e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek      CurPtr += DISK_TOKEN_SIZE*2;
264268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    else
26541a2660377d215d004fe413c03874bd066b5384cTed Kremenek      LastHashTokPtr = HashEntryI;
266268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
267268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    return isEndif;
268268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  }
269268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
270268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
27141a2660377d215d004fe413c03874bd066b5384cTed Kremenek  CurPtr = HashEntryI;
272268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
273268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Update the location of the last observed '#'.  This is useful if we
274268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // are skipping multiple blocks.
275268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  LastHashTokPtr = CurPtr;
276268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
277e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // Skip the '#' token.
278e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
279e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  CurPtr += DISK_TOKEN_SIZE;
280e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
281268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Did we reach a #endif?  If so, go ahead and consume that token as well.
282e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
283268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
284268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  return isEndif;
285268ee7016a2811803989487c0ad3799486092c63Ted Kremenek}
286268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
28730a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed KremenekSourceLocation PTHLexer::getSourceLocation() {
28830a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // getLocation is not on the hot path.  It is used to get the location of
28930a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // the next token when transitioning back to this lexer when done
29030a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // handling a #included file.  Just read the necessary data from the token
29130a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // data buffer to construct the SourceLocation object.
29230a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // NOTE: This is a virtual function; hence it is defined out-of-line.
29359d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  const char* p = CurPtr + (1 + 1 + 3);
29430a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  uint32_t offset =
29530a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek       ((uint32_t) ((uint8_t) p[0]))
29630a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek    | (((uint32_t) ((uint8_t) p[1])) << 8)
29730a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek    | (((uint32_t) ((uint8_t) p[2])) << 16)
29830a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek    | (((uint32_t) ((uint8_t) p[3])) << 24);
29930a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  return SourceLocation::getFileLoc(FileID, offset);
30030a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek}
30130a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek
3025f074266cc59563036c40516c814d63825723e20Ted Kremenek//===----------------------------------------------------------------------===//
3035f074266cc59563036c40516c814d63825723e20Ted Kremenek// getSpelling() - Use cached data in PTH files for getSpelling().
3045f074266cc59563036c40516c814d63825723e20Ted Kremenek//===----------------------------------------------------------------------===//
3055f074266cc59563036c40516c814d63825723e20Ted Kremenek
3065f074266cc59563036c40516c814d63825723e20Ted Kremenekunsigned PTHManager::getSpelling(unsigned FileID, unsigned fpos,
3075f074266cc59563036c40516c814d63825723e20Ted Kremenek                                 const char *& Buffer) {
3085f074266cc59563036c40516c814d63825723e20Ted Kremenek
3095f074266cc59563036c40516c814d63825723e20Ted Kremenek  llvm::DenseMap<unsigned,PTHSpellingSearch*>::iterator I =
3105f074266cc59563036c40516c814d63825723e20Ted Kremenek    SpellingMap.find(FileID);
3115f074266cc59563036c40516c814d63825723e20Ted Kremenek
3125f074266cc59563036c40516c814d63825723e20Ted Kremenek  if (I == SpellingMap.end())
3135f074266cc59563036c40516c814d63825723e20Ted Kremenek      return 0;
3145f074266cc59563036c40516c814d63825723e20Ted Kremenek
3155f074266cc59563036c40516c814d63825723e20Ted Kremenek  return I->second->getSpellingBinarySearch(fpos, Buffer);
3165f074266cc59563036c40516c814d63825723e20Ted Kremenek}
3175f074266cc59563036c40516c814d63825723e20Ted Kremenek
3185f074266cc59563036c40516c814d63825723e20Ted Kremenekunsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset,
3195f074266cc59563036c40516c814d63825723e20Ted Kremenek                                            const char *& Buffer) {
3205f074266cc59563036c40516c814d63825723e20Ted Kremenek
32132a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  const char* p = Buf->getBufferStart() + PTHOffset;
32232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  assert(p < Buf->getBufferEnd());
32332a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
32432a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  // The string is prefixed by 16 bits for its length, followed by the string
32532a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  // itself.
32632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  unsigned len = ((unsigned) ((uint8_t) p[0]))
32732a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    | (((unsigned) ((uint8_t) p[1])) << 8);
32832a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
32932a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  Buffer = p + 2;
33032a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  return len;
33132a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek}
33232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
3335f074266cc59563036c40516c814d63825723e20Ted Kremenekunsigned PTHSpellingSearch::getSpellingLinearSearch(unsigned fpos,
3345f074266cc59563036c40516c814d63825723e20Ted Kremenek                                                    const char *&Buffer) {
3355f074266cc59563036c40516c814d63825723e20Ted Kremenek  const char* p = LinearItr;
33632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  unsigned len = 0;
3375f074266cc59563036c40516c814d63825723e20Ted Kremenek
3385f074266cc59563036c40516c814d63825723e20Ted Kremenek  if (!SpellingsLeft)
3395f074266cc59563036c40516c814d63825723e20Ted Kremenek    return getSpellingBinarySearch(fpos, Buffer);
3405f074266cc59563036c40516c814d63825723e20Ted Kremenek
3415f074266cc59563036c40516c814d63825723e20Ted Kremenek  do {
34232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    uint32_t TokOffset =
34332a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      ((uint32_t) ((uint8_t) p[0]))
34432a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      | (((uint32_t) ((uint8_t) p[1])) << 8)
34532a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      | (((uint32_t) ((uint8_t) p[2])) << 16)
34632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      | (((uint32_t) ((uint8_t) p[3])) << 24);
34732a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
34832a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    if (TokOffset > fpos)
3495f074266cc59563036c40516c814d63825723e20Ted Kremenek      return getSpellingBinarySearch(fpos, Buffer);
35032a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
35132a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    --SpellingsLeft;
35232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
35332a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    // Did we find a matching token offset for this spelling?
35432a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    if (TokOffset == fpos) {
35532a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      uint32_t SpellingPTHOffset =
35632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek        ((uint32_t) ((uint8_t) p[4]))
35732a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek        | (((uint32_t) ((uint8_t) p[5])) << 8)
35832a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek        | (((uint32_t) ((uint8_t) p[6])) << 16)
35932a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek        | (((uint32_t) ((uint8_t) p[7])) << 24);
36032a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
3615f074266cc59563036c40516c814d63825723e20Ted Kremenek      len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
36232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek      break;
36332a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    }
36432a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
36532a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    // No match.  Keep on looking.
36632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek    p += sizeof(uint32_t)*2;
36732a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  }
3685f074266cc59563036c40516c814d63825723e20Ted Kremenek  while (SpellingsLeft);
36932a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
3705f074266cc59563036c40516c814d63825723e20Ted Kremenek  LinearItr = p;
37132a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  return len;
372b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek}
373b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek
3745f074266cc59563036c40516c814d63825723e20Ted Kremenekunsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
3755f074266cc59563036c40516c814d63825723e20Ted Kremenek                                                    const char *& Buffer) {
3765f074266cc59563036c40516c814d63825723e20Ted Kremenek
3775f074266cc59563036c40516c814d63825723e20Ted Kremenek  assert ((TableEnd - TableBeg) % SpellingEntrySize == 0);
3785f074266cc59563036c40516c814d63825723e20Ted Kremenek
3795f074266cc59563036c40516c814d63825723e20Ted Kremenek  unsigned min = 0;
3805f074266cc59563036c40516c814d63825723e20Ted Kremenek  const char* tb = TableBeg;
3815f074266cc59563036c40516c814d63825723e20Ted Kremenek  unsigned max = (TableEnd - tb) / SpellingEntrySize;
3825f074266cc59563036c40516c814d63825723e20Ted Kremenek
3835f074266cc59563036c40516c814d63825723e20Ted Kremenek  while (min != max) {
3845f074266cc59563036c40516c814d63825723e20Ted Kremenek    unsigned i = (max - min) / 2 + min;
3855f074266cc59563036c40516c814d63825723e20Ted Kremenek    const char* p = tb + (i * SpellingEntrySize);
3865f074266cc59563036c40516c814d63825723e20Ted Kremenek
3875f074266cc59563036c40516c814d63825723e20Ted Kremenek    uint32_t TokOffset =
3885f074266cc59563036c40516c814d63825723e20Ted Kremenek      ((uint32_t) ((uint8_t) p[0]))
3895f074266cc59563036c40516c814d63825723e20Ted Kremenek      | (((uint32_t) ((uint8_t) p[1])) << 8)
3905f074266cc59563036c40516c814d63825723e20Ted Kremenek      | (((uint32_t) ((uint8_t) p[2])) << 16)
3915f074266cc59563036c40516c814d63825723e20Ted Kremenek      | (((uint32_t) ((uint8_t) p[3])) << 24);
3925f074266cc59563036c40516c814d63825723e20Ted Kremenek
3935f074266cc59563036c40516c814d63825723e20Ted Kremenek    if (TokOffset > fpos) {
3945f074266cc59563036c40516c814d63825723e20Ted Kremenek      max = i;
3955f074266cc59563036c40516c814d63825723e20Ted Kremenek      continue;
3965f074266cc59563036c40516c814d63825723e20Ted Kremenek    }
3975f074266cc59563036c40516c814d63825723e20Ted Kremenek
3985f074266cc59563036c40516c814d63825723e20Ted Kremenek    if (TokOffset < fpos) {
3995f074266cc59563036c40516c814d63825723e20Ted Kremenek      min = i;
4005f074266cc59563036c40516c814d63825723e20Ted Kremenek      continue;
4015f074266cc59563036c40516c814d63825723e20Ted Kremenek    }
4025f074266cc59563036c40516c814d63825723e20Ted Kremenek
4035f074266cc59563036c40516c814d63825723e20Ted Kremenek    uint32_t SpellingPTHOffset =
4045f074266cc59563036c40516c814d63825723e20Ted Kremenek        ((uint32_t) ((uint8_t) p[4]))
4055f074266cc59563036c40516c814d63825723e20Ted Kremenek        | (((uint32_t) ((uint8_t) p[5])) << 8)
4065f074266cc59563036c40516c814d63825723e20Ted Kremenek        | (((uint32_t) ((uint8_t) p[6])) << 16)
4075f074266cc59563036c40516c814d63825723e20Ted Kremenek        | (((uint32_t) ((uint8_t) p[7])) << 24);
4085f074266cc59563036c40516c814d63825723e20Ted Kremenek
4095f074266cc59563036c40516c814d63825723e20Ted Kremenek    return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
4105f074266cc59563036c40516c814d63825723e20Ted Kremenek  }
4115f074266cc59563036c40516c814d63825723e20Ted Kremenek
4125f074266cc59563036c40516c814d63825723e20Ted Kremenek  return 0;
4135f074266cc59563036c40516c814d63825723e20Ted Kremenek}
4145f074266cc59563036c40516c814d63825723e20Ted Kremenek
4155f074266cc59563036c40516c814d63825723e20Ted Kremenekunsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
4165f074266cc59563036c40516c814d63825723e20Ted Kremenek  SourceManager& SM = PP->getSourceManager();
4175f074266cc59563036c40516c814d63825723e20Ted Kremenek  sloc = SM.getPhysicalLoc(sloc);
4185f074266cc59563036c40516c814d63825723e20Ted Kremenek  unsigned fid = SM.getCanonicalFileID(sloc);
4195f074266cc59563036c40516c814d63825723e20Ted Kremenek  unsigned fpos = SM.getFullFilePos(sloc);
4205f074266cc59563036c40516c814d63825723e20Ted Kremenek
4215f074266cc59563036c40516c814d63825723e20Ted Kremenek  if (fid == FileID)
4225f074266cc59563036c40516c814d63825723e20Ted Kremenek    return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer);
4235f074266cc59563036c40516c814d63825723e20Ted Kremenek
4245f074266cc59563036c40516c814d63825723e20Ted Kremenek  return PTHMgr.getSpelling(fid, fpos, Buffer);
4255f074266cc59563036c40516c814d63825723e20Ted Kremenek}
4265f074266cc59563036c40516c814d63825723e20Ted Kremenek
4270c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
4280c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek// Internal Data Structures for PTH file lookup and resolving identifiers.
4290c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
4300c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4310c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4320c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek/// PTHFileLookup - This internal data structure is used by the PTHManager
4330c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek///  to map from FileEntry objects managed by FileManager to offsets within
4340c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek///  the PTH file.
4350c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremeneknamespace {
4360c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekclass VISIBILITY_HIDDEN PTHFileLookup {
4370c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekpublic:
4380c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  class Val {
439fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    uint32_t TokenOff;
440fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    uint32_t PPCondOff;
441b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek    uint32_t SpellingOff;
4420c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4430c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  public:
444fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    Val() : TokenOff(~0) {}
445b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek    Val(uint32_t toff, uint32_t poff, uint32_t soff)
446b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      : TokenOff(toff), PPCondOff(poff), SpellingOff(soff) {}
4470c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
448fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    uint32_t getTokenOffset() const {
449fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek      assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
450fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek      return TokenOff;
4510c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    }
4520c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
453b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek    uint32_t getPPCondOffset() const {
454fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek      assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
455fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek      return PPCondOff;
456fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    }
457fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek
458b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek    uint32_t getSpellingOffset() const {
459b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
460b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      return SpellingOff;
461b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek    }
462b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek
463fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek    bool isValid() const { return TokenOff != ~((uint32_t)0); }
4640c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  };
4650c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4660c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekprivate:
4670c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  llvm::StringMap<Val> FileMap;
4680c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4690c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekpublic:
4700c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  PTHFileLookup() {};
4710c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4720c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  Val Lookup(const FileEntry* FE) {
4730c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    const char* s = FE->getName();
4740c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    unsigned size = strlen(s);
4750c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return FileMap.GetOrCreateValue(s, s+size).getValue();
4760c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
4770c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4780c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  void ReadTable(const char* D) {
4790c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    uint32_t N = Read32(D);     // Read the length of the table.
4800c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4810c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    for ( ; N > 0; --N) {       // The rest of the data is the table itself.
4820c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek      uint32_t len = Read32(D);
4830c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek      const char* s = D;
4840c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek      D += len;
485b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek
486fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek      uint32_t TokenOff = Read32(D);
487b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      uint32_t PPCondOff = Read32(D);
488b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      uint32_t SpellingOff = Read32(D);
489b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek
490b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek      FileMap.GetOrCreateValue(s, s+len).getValue() =
491b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek        Val(TokenOff, PPCondOff, SpellingOff);
4920c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    }
4930c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
4940c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek};
4950c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek} // end anonymous namespace
4960c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4970c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
4980c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek// PTHManager methods.
4990c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
5000c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5010c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
502cf58e6249c6b018508e34bcb76202caa42d2451aTed Kremenek                       const char* idDataTable, IdentifierInfo** perIDCache,
5036183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek                       Preprocessor& pp)
5046183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
5056183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
5060c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5070c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHManager::~PTHManager() {
5080c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  delete Buf;
5090c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  delete (PTHFileLookup*) FileLookup;
5100e50b6e7c104d00614baa3d80df62f1630a94d9cTed Kremenek  free(PerIDCache);
5110c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
5120c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5130c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
5140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Memory map the PTH file.
5160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  llvm::OwningPtr<llvm::MemoryBuffer>
5170c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  File(llvm::MemoryBuffer::getFile(file.c_str()));
5180c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!File)
5200c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
5210c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Get the buffer ranges and check if there are at least three 32-bit
5230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // words at the end of the file.
5240c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* BufBeg = File->getBufferStart();
5250c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* BufEnd = File->getBufferEnd();
5260c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5270c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
5280c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    assert(false && "Invalid PTH file.");
5290c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0; // FIXME: Proper error diagnostic?
5300c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
5310c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5320c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Compute the address of the index table at the end of the PTH file.
5330c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // This table contains the offset of the file lookup table, the
5340c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // persistent ID -> identifer data table.
5350c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* EndTable = BufEnd - sizeof(uint32_t)*3;
5360c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5370c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Construct the file lookup table.  This will be used for mapping from
5380c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // FileEntry*'s to cached tokens.
5390c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
5400c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* FileTable = BufBeg + Read32(FileTableOffset);
5410c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5420c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
5430c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    assert(false && "Invalid PTH file.");
5440c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0; // FIXME: Proper error diagnostic?
5450c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
5460c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5470c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
5480c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  FL->ReadTable(FileTable);
5490c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5500c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Get the location of the table mapping from persistent ids to the
5510c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // data needed to reconstruct identifiers.
5520c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
5530c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* IData = BufBeg + Read32(IDTableOffset);
5540c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!(IData > BufBeg && IData < BufEnd)) {
5550c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    assert(false && "Invalid PTH file.");
5560c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0; // FIXME: Proper error diagnostic?
5570c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
5580c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5596183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
5606183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  uint32_t NumIds = Read32(IData);
5616183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek
5626183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
5636183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // so that we in the best case only zero out memory once when the OS returns
5646183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // us new pages.
5656183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  IdentifierInfo** PerIDCache =
5666183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek    (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
5676183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek
5686183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  if (!PerIDCache) {
5696183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek    assert(false && "Could not allocate Persistent ID cache.");
5706183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek    return 0;
5716183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  }
5726183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek
5736183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // Create the new lexer.
5746183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
5750c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
5760c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
577866bdf74547efe32c320554837ffce00fcc084feTed KremenekIdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) {
578866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
5790c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Check if the IdentifierInfo has already been resolved.
580cf58e6249c6b018508e34bcb76202caa42d2451aTed Kremenek  IdentifierInfo*& II = PerIDCache[persistentID];
5810c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (II) return II;
5820c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5830c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Look in the PTH file for the string data for the IdentifierInfo object.
5840c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
5850c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
5860c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  assert(IDData < Buf->getBufferEnd());
5870c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5880c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Read the length of the string.
5890c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  uint32_t len = Read32(IDData);
5900c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5910c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Get the IdentifierInfo* with the specified string.
5920c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  II = &ITable.get(IDData, IDData+len);
5930c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  return II;
5940c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
5950c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5960c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
5970c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5980c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!FE)
5990c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
6000c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6010c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Lookup the FileEntry object in our file lookup data structure.  It will
6020c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // return a variant that indicates whether or not there is an offset within
6030c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // the PTH file that contains cached tokens.
604fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek  PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE);
6050c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
606fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek  if (!FileData.isValid()) // No tokens available.
6070c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
6080c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6090c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Compute the offset of the token data within the buffer.
610fb645b6547b75ddc2e3c7ab2126ad8beeefca62dTed Kremenek  const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
611268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
612268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Get the location of pp-conditional table.
613b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek  const char* ppcond = Buf->getBufferStart() + FileData.getPPCondOffset();
614b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek  uint32_t len = Read32(ppcond);
615268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  if (len == 0) ppcond = 0;
61632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
61732a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  // Get the location of the spelling table.
61832a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  const char* spellingTable = Buf->getBufferStart() +
61932a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek                              FileData.getSpellingOffset();
62032a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
62132a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  len = Read32(spellingTable);
62232a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek  if (len == 0) spellingTable = 0;
623b70e3dafb9618f34017061400dc19ac5e3539a6dTed Kremenek
6240c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  assert(data < Buf->getBufferEnd());
6255f074266cc59563036c40516c814d63825723e20Ted Kremenek
6265f074266cc59563036c40516c814d63825723e20Ted Kremenek  // Create the SpellingSearch object for this FileID.
6275f074266cc59563036c40516c814d63825723e20Ted Kremenek  PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable);
6285f074266cc59563036c40516c814d63825723e20Ted Kremenek  SpellingMap[FileID] = ss;
6295f074266cc59563036c40516c814d63825723e20Ted Kremenek
630268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
6315f074266cc59563036c40516c814d63825723e20Ted Kremenek                      *ss, *this);
6320c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
633