PTHLexer.cpp revision d8c02929fe70f03111be73e7b8c402c724238ee9
1010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
3010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//                     The LLVM Compiler Infrastructure
4010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
5010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source
6010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// License. See LICENSE.TXT for details.
7010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
8010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//===----------------------------------------------------------------------===//
9010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
10010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// This file implements the PTHLexer interface.
11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//
12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Basic/TokenKinds.h"
15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Basic/FileManager.h"
16010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "clang/Basic/IdentifierTable.h"
17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Lex/PTHLexer.h"
1846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "clang/Lex/Preprocessor.h"
19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Lex/PTHManager.h"
20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Lex/Token.h"
21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "clang/Lex/Preprocessor.h"
22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "llvm/ADT/StringMap.h"
23010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "llvm/ADT/OwningPtr.h"
24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "llvm/Support/Compiler.h"
25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "llvm/Support/MathExtras.h"
26010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "llvm/Support/MemoryBuffer.h"
27010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "llvm/System/Host.h"
28010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using namespace clang;
29010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
30010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define DISK_TOKEN_SIZE (1+1+2+4+4)
31010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Utility methods for reading from the mmap'ed PTH file.
34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) {
37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint16_t V = ((uint16_t)Data[0]) |
38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)               ((uint16_t)Data[1] <<  8);
39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Data += 2;
40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return V;
41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) {
44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t V = ((uint32_t)Data[0])  |
45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)               ((uint32_t)Data[1] << 8)  |
46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)               ((uint32_t)Data[2] << 16) |
47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)               ((uint32_t)Data[3] << 24);
48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Data += 4;
49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return V;
50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)static inline uint32_t ReadLE32(const unsigned char *&Data) {
53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Hosts that directly support little-endian 32-bit loads can just
54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // use them.  Big-endian hosts need a bswap.
55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t V = *((uint32_t*)Data);
56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (llvm::sys::isBigEndianHost())
57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    V = llvm::ByteSwap_32(V);
58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Data += 4;
59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return V;
60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// PTHLexer methods.
6546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//===----------------------------------------------------------------------===//
6646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)
67010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
68010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)                   const unsigned char *ppcond, PTHManager &PM)
69010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
70010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
71010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
72010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
73010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)}
74010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
75010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)void PTHLexer::Lex(Token& Tok) {
76010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)LexNextToken:
77010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
78010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  //===--------------------------------------==//
79010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // Read the raw token data.
80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //===--------------------------------------==//
81010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
82010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // Shadow CurPtr into an automatic variable.
83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  const unsigned char *CurPtrShadow = CurPtr;
84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // Read in the data for the token.
86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  unsigned Word0 = ReadLE32(CurPtrShadow);
87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
88010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  uint32_t FileOffset = ReadLE32(CurPtrShadow);
89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
90010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
91f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
92010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  uint32_t Len = Word0 >> 16;
93010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
94010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  CurPtr = CurPtrShadow;
95010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
96010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  //===--------------------------------------==//
97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Construct the token itself.
98f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //===--------------------------------------==//
99f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok.startToken();
101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok.setKind(TKind);
102f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok.setFlag(TFlags);
103f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(!LexingRawMode);
104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok.setLength(Len);
106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
107f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Handle identifiers.
108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (Tok.isLiteral()) {
109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
110f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  else if (IdentifierID) {
112f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    MIOpt.ReadToken();
113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Tok.setIdentifierInfo(II);
116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Change the kind of this identifier to the appropriate token kind, e.g.
118010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    // turning "for" into a keyword.
119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Tok.setKind(II->getTokenID());
120010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
121010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (II->isHandleIdentifierCase())
122010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      PP->HandleIdentifier(Tok);
123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return;
124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //===--------------------------------------==//
127f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Process the token.
128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //===--------------------------------------==//
129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#if 0
130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  SourceManager& SM = PP->getSourceManager();
131010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
132010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
133010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
134010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    << '\n';
135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#endif
136f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
137010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  if (TKind == tok::eof) {
138010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    // Save the end-of-file token.
139f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    EofToken = Tok;
140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Preprocessor *PPCache = PP;
142f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
143010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    assert(!ParsingPreprocessorDirective);
144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(!LexingRawMode);
145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // FIXME: Issue diagnostics similar to Lexer.
147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (PP->HandleEndOfFile(Tok, false))
148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return;
149010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
150f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
151010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    return PPCache->Lex(Tok);
152f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
153f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
154f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
155f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(!LexingRawMode);
157f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    PP->HandleDirective(Tok);
158f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
159f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (PP->isCurrentLexer(this))
160f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      goto LexNextToken;
161f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
162f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return PP->Lex(Tok);
163f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
164f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
165f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (TKind == tok::eom) {
166f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(ParsingPreprocessorDirective);
167f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    ParsingPreprocessorDirective = false;
168010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    return;
169f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
170f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
171f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  MIOpt.ReadToken();
172f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
173f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
174f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// FIXME: We can just grab the last token instead of storing a copy
175f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// into EofToken.
176f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)void PTHLexer::getEOF(Token& Tok) {
177f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(EofToken.is(tok::eof));
178f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Tok = EofToken;
179010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)}
180010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
181010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)void PTHLexer::DiscardToEndOfLine() {
182010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
183010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)         "Must be in a preprocessing directive!");
184010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
185010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // We assume that if the preprocessor wishes to discard to the end of
186010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // the line that it also means to end the current preprocessor directive.
187010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  ParsingPreprocessorDirective = false;
188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // Skip tokens by only peeking at their token kind and the flags.
190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // We don't need to actually reconstruct full tokens from the token buffer.
191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // This saves some copies and it also reduces IdentifierInfo* lookup.
192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  const unsigned char* p = CurPtr;
193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  while (1) {
194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    // Read the token kind.  Are we at the end of the file?
195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    if (x == tok::eof) break;
197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
198f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Read the token flags.  Are we at the start of the next line?
199f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
200f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (y & Token::StartOfLine) break;
201f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
202f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Skip to the next token.
203f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    p += DISK_TOKEN_SIZE;
204f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
205f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
206f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  CurPtr = p;
207f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
208f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
209f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)/// SkipBlock - Used by Preprocessor to skip the current conditional block.
210f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool PTHLexer::SkipBlock() {
211f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(CurPPCondPtr && "No cached PP conditional information.");
212f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(LastHashTokPtr && "No known '#' token.");
213f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
214f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned char* HashEntryI = 0;
215f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t Offset;
216f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t TableIdx;
217f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
218f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  do {
219f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Read the token offset from the side-table.
220f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Offset = ReadLE32(CurPPCondPtr);
221f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
222f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Read the target table index from the side-table.
223f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    TableIdx = ReadLE32(CurPPCondPtr);
224f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
225f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Compute the actual memory address of the '#' token data for this entry.
226f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    HashEntryI = TokBuf + Offset;
227f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
228f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
229f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    //  contain nested blocks.  In the side-table we can jump over these
230f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    //  nested blocks instead of doing a linear search if the next "sibling"
231f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    //  entry is not at a location greater than LastHashTokPtr.
232f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (HashEntryI < LastHashTokPtr && TableIdx) {
233f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // In the side-table we are still at an entry for a '#' token that
234f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // is earlier than the last one we saw.  Check if the location we would
235f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // stride gets us closer.
236f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      const unsigned char* NextPPCondPtr =
237f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        PPCond + TableIdx*(sizeof(uint32_t)*2);
238f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      assert(NextPPCondPtr >= CurPPCondPtr);
239f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // Read where we should jump to.
240f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
241f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
242f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
243f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      if (HashEntryJ <= LastHashTokPtr) {
244f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        // Jump directly to the next entry in the side table.
245f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        HashEntryI = HashEntryJ;
246f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        Offset = TmpOffset;
247f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        TableIdx = ReadLE32(NextPPCondPtr);
248f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        CurPPCondPtr = NextPPCondPtr;
249f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      }
250f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
251f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
252f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  while (HashEntryI < LastHashTokPtr);
253f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
254f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(TableIdx && "No jumping from #endifs.");
255f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
256f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Update our side-table iterator.
257f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
258f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(NextPPCondPtr >= CurPPCondPtr);
259f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  CurPPCondPtr = NextPPCondPtr;
260f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
261f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Read where we should jump to.
262f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
263f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
264f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
265f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // By construction NextIdx will be zero if this is a #endif.  This is useful
266f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // to know to obviate lexing another token.
267f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  bool isEndif = NextIdx == 0;
268f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
269f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // This case can occur when we see something like this:
270f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //
271f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //  #if ...
272f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //   /* a comment or nothing */
273f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //  #elif
274f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //
275f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // If we are skipping the first #if block it will be the case that CurPtr
276f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // already points 'elif'.  Just return.
277f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
278f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (CurPtr > HashEntryI) {
279f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    // Did we reach a #endif?  If so, go ahead and consume that token as well.
281f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (isEndif)
282f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      CurPtr += DISK_TOKEN_SIZE*2;
283f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    else
284f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      LastHashTokPtr = HashEntryI;
285f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
286f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return isEndif;
287f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
288f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
289f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
290f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  CurPtr = HashEntryI;
291f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
292f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Update the location of the last observed '#'.  This is useful if we
293f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // are skipping multiple blocks.
294f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  LastHashTokPtr = CurPtr;
295f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
296f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Skip the '#' token.
297f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  assert(((tok::TokenKind)*CurPtr) == tok::hash);
298f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  CurPtr += DISK_TOKEN_SIZE;
299f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
300f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Did we reach a #endif?  If so, go ahead and consume that token as well.
301f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
302f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
303f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return isEndif;
304f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
305f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
306f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SourceLocation PTHLexer::getSourceLocation() {
307f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // getSourceLocation is not on the hot path.  It is used to get the location
308f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // of the next token when transitioning back to this lexer when done
309f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // handling a #included file.  Just read the necessary data from the token
310f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // data buffer to construct the SourceLocation object.
311f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // NOTE: This is a virtual function; hence it is defined out-of-line.
312f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
313f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32_t Offset = ReadLE32(OffsetPtr);
314f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return FileStartLoc.getFileLocWithOffset(Offset);
315f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
316f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
317f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
318f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// OnDiskChainedHashTable
319f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
320f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
321f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)template<typename Info>
322f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class OnDiskChainedHashTable {
323f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned NumBuckets;
324f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned NumEntries;
325f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned char* const Buckets;
326f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const unsigned char* const Base;
327f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)public:
328f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  typedef typename Info::internal_key_type internal_key_type;
329f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  typedef typename Info::external_key_type external_key_type;
330f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  typedef typename Info::data_type         data_type;
331f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
332f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
333f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         const unsigned char* buckets,
334f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         const unsigned char* base)
335f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    : NumBuckets(numBuckets), NumEntries(numEntries),
336f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      Buckets(buckets), Base(base) {
337f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
338f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)               "'buckets' must have a 4-byte alignment");
339f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      }
340f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
341f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
342f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  bool isEmpty() const { return NumEntries == 0; }
343f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
344f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  class iterator {
345f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const unsigned char* const data;
346f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const unsigned len;
347f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  public:
348f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    iterator() : data(0), len(0) {}
349f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    iterator(const unsigned char* d, unsigned l) : data(d), len(l) {}
350f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
351f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    data_type operator*() const { return Info::ReadData(data, len); }
352f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    bool operator==(const iterator& X) const { return X.data == data; }
353f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    bool operator!=(const iterator& X) const { return X.data != data; }
354f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  };
355f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
356f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  iterator find(const external_key_type& eKey) {
357f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const internal_key_type& iKey = Info::GetInternalKey(eKey);
358f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned key_hash = Info::ComputeHash(iKey);
359f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
360f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Each bucket is just a 32-bit offset into the PTH file.
361f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned idx = key_hash & (NumBuckets - 1);
362f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
363f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
364f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned offset = ReadLE32(Bucket);
365f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (offset == 0) return iterator(); // Empty bucket.
366f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const unsigned char* Items = Base + offset;
367f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
368f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // 'Items' starts with a 16-bit unsigned integer representing the
369f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // number of items in this bucket.
370f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned len = ReadUnalignedLE16(Items);
371f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
372f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    for (unsigned i = 0; i < len; ++i) {
373f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // Read the hash.
374f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      uint32_t item_hash = ReadUnalignedLE32(Items);
375f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
376f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // Determine the length of the key and the data.
377f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items);
378f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      unsigned item_len = L.first + L.second;
379f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
380f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // Compare the hashes.  If they are not the same, skip the entry entirely.
381f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      if (item_hash != key_hash) {
382f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        Items += item_len;
383010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        continue;
384010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      }
385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
386f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // Read the key.
387f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      const internal_key_type& X =
388f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        Info::ReadKey((const unsigned char* const) Items, L.first);
389f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
390f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // If the key doesn't match just skip reading the value.
391f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      if (!Info::EqualKey(X, iKey)) {
392f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        Items += item_len;
393f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        continue;
3941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
3951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      // The key matches!
3971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return iterator(Items + L.first, L.second);
3981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
3991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
4001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return iterator();
4011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
4021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
403f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  iterator end() const { return iterator(); }
404f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
405f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
406f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  static OnDiskChainedHashTable* Create(const unsigned char* buckets,
407f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                        const unsigned char* const base) {
408f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
409f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert(buckets > base);
410f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
411f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           "buckets should be 4-byte aligned.");
412f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
413f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned numBuckets = ReadLE32(buckets);
414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    unsigned numEntries = ReadLE32(buckets);
415f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
416f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                            base);
417f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
418f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)};
419f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
420f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
421f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// PTH file lookup: map from strings to file data.
422f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//===----------------------------------------------------------------------===//
423f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
424f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)/// PTHFileLookup - This internal data structure is used by the PTHManager
425010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)///  to map from FileEntry objects managed by FileManager to offsets within
426f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)///  the PTH file.
427f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace {
428f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class VISIBILITY_HIDDEN PTHFileData {
429f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const uint32_t TokenOff;
430010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  const uint32_t PPCondOff;
431f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)public:
432010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
433010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
434010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
435010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  uint32_t getTokenOffset() const { return TokenOff; }
436010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  uint32_t getPPCondOffset() const { return PPCondOff; }
437};
438
439class VISIBILITY_HIDDEN PTHFileLookupTrait {
440public:
441  typedef PTHFileData      data_type;
442  typedef const FileEntry* external_key_type;
443  typedef const char*      internal_key_type;
444
445  static bool EqualKey(const char* a, const char* b) {
446    return strcmp(a, b) == 0;
447  }
448
449  static unsigned ComputeHash(const char* x) {
450    // More copy-paste nonsense.  Will refactor.
451    unsigned int R = 0;
452    for (; *x != '\0' ; ++x) R = R * 33 + *x;
453    return R + (R >> 5);
454  }
455
456  static const char* GetInternalKey(const FileEntry* FE) {
457    return FE->getName();
458  }
459
460  static std::pair<unsigned, unsigned>
461  ReadKeyDataLength(const unsigned char*& d) {
462    return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U);
463  }
464
465  static const char* ReadKey(const unsigned char* d, unsigned) {
466    return (const char*) d;
467  }
468
469  static PTHFileData ReadData(const unsigned char* d, unsigned) {
470    uint32_t x = ::ReadUnalignedLE32(d);
471    uint32_t y = ::ReadUnalignedLE32(d);
472    return PTHFileData(x, y);
473  }
474};
475} // end anonymous namespace
476
477typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup;
478
479//===----------------------------------------------------------------------===//
480// PTHManager methods.
481//===----------------------------------------------------------------------===//
482
483PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
484                       const unsigned char* idDataTable,
485                       IdentifierInfo** perIDCache,
486                       const unsigned char* sortedIdTable, unsigned numIds,
487                       const unsigned char* spellingBase)
488: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
489  IdDataTable(idDataTable), SortedIdTable(sortedIdTable),
490  NumIds(numIds), PP(0), SpellingBase(spellingBase) {}
491
492PTHManager::~PTHManager() {
493  delete Buf;
494  delete (PTHFileLookup*) FileLookup;
495  free(PerIDCache);
496}
497
498static void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) {
499  if (!Diags) return;
500  if (!Msg) Msg = "Invalid or corrupted PTH file";
501  unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, Msg);
502  Diags->Report(FullSourceLoc(), DiagID);
503}
504
505PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) {
506  // Memory map the PTH file.
507  llvm::OwningPtr<llvm::MemoryBuffer>
508  File(llvm::MemoryBuffer::getFile(file.c_str()));
509
510  if (!File) {
511    if (Diags) {
512      unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note,
513                                               "PTH file %0 could not be read");
514      Diags->Report(FullSourceLoc(), DiagID) << file;
515    }
516
517    return 0;
518  }
519
520  // Get the buffer ranges and check if there are at least three 32-bit
521  // words at the end of the file.
522  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
523  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
524
525  // Check the prologue of the file.
526  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
527      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
528    InvalidPTH(Diags);
529    return 0;
530  }
531
532  // Read the PTH version.
533  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
534  unsigned Version = ReadLE32(p);
535
536  if (Version != PTHManager::Version) {
537    InvalidPTH(Diags,
538        Version < PTHManager::Version
539        ? "PTH file uses an older PTH format that is no longer supported"
540        : "PTH file uses a newer PTH format that cannot be read");
541    return 0;
542  }
543
544  // Compute the address of the index table at the end of the PTH file.
545  const unsigned char *EndTable = BufBeg + ReadLE32(p);
546
547  if (EndTable >= BufEnd) {
548    InvalidPTH(Diags);
549    return 0;
550  }
551
552  // Construct the file lookup table.  This will be used for mapping from
553  // FileEntry*'s to cached tokens.
554  const unsigned char* FileTableOffset = EndTable + sizeof(uint32_t)*3;
555  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
556
557  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
558    InvalidPTH(Diags);
559    return 0; // FIXME: Proper error diagnostic?
560  }
561
562  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
563  if (FL->isEmpty()) {
564    InvalidPTH(Diags, "PTH file contains no cached source data");
565    return 0;
566  }
567
568  // Get the location of the table mapping from persistent ids to the
569  // data needed to reconstruct identifiers.
570  const unsigned char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
571  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
572
573  if (!(IData >= BufBeg && IData < BufEnd)) {
574    InvalidPTH(Diags);
575    return 0;
576  }
577
578  // Get the location of the lexigraphically-sorted table of persistent IDs.
579  const unsigned char* SortedIdTableOffset = EndTable + sizeof(uint32_t)*2;
580  const unsigned char* SortedIdTable = BufBeg + ReadLE32(SortedIdTableOffset);
581  if (!(SortedIdTable >= BufBeg && SortedIdTable < BufEnd)) {
582    InvalidPTH(Diags);
583    return 0;
584  }
585
586  // Get the location of the spelling cache.
587  const unsigned char* spellingBaseOffset = EndTable + sizeof(uint32_t)*4;
588  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
589  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
590    InvalidPTH(Diags);
591    return 0;
592  }
593
594  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
595  uint32_t NumIds = ReadLE32(IData);
596
597  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
598  // so that we in the best case only zero out memory once when the OS returns
599  // us new pages.
600  IdentifierInfo** PerIDCache = 0;
601
602  if (NumIds) {
603    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
604    if (!PerIDCache) {
605      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
606      return 0;
607    }
608  }
609
610  // Create the new PTHManager.
611  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
612                        SortedIdTable, NumIds, spellingBase);
613}
614IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
615  // Look in the PTH file for the string data for the IdentifierInfo object.
616  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
617  const unsigned char* IDData =
618    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
619  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
620
621  // Allocate the object.
622  std::pair<IdentifierInfo,const unsigned char*> *Mem =
623    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
624
625  Mem->second = IDData;
626  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
627
628  // Store the new IdentifierInfo in the cache.
629  PerIDCache[PersistentID] = II;
630  return II;
631}
632
633IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
634  unsigned min = 0;
635  unsigned max = NumIds;
636  unsigned Len = NameEnd - NameStart;
637
638  do {
639    unsigned i = (max - min) / 2 + min;
640    const unsigned char *Ptr = SortedIdTable + (i * 4);
641
642    // Read the persistentID.
643    unsigned perID = ReadLE32(Ptr);
644
645    // Get the IdentifierInfo.
646    IdentifierInfo* II = GetIdentifierInfo(perID);
647
648    // First compare the lengths.
649    unsigned IILen = II->getLength();
650    if (Len < IILen) goto IsLess;
651    if (Len > IILen) goto IsGreater;
652
653    // Now compare the strings!
654    {
655      signed comp = strncmp(NameStart, II->getName(), Len);
656      if (comp < 0) goto IsLess;
657      if (comp > 0) goto IsGreater;
658    }
659    // We found a match!
660    return II;
661
662  IsGreater:
663    if (i == min) break;
664    min = i;
665    continue;
666
667  IsLess:
668    max = i;
669    assert(!(max == min) || (min == i));
670  }
671  while (min != max);
672
673  return 0;
674}
675
676
677PTHLexer *PTHManager::CreateLexer(FileID FID) {
678  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
679  if (!FE)
680    return 0;
681
682  // Lookup the FileEntry object in our file lookup data structure.  It will
683  // return a variant that indicates whether or not there is an offset within
684  // the PTH file that contains cached tokens.
685  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
686  PTHFileLookup::iterator I = PFL.find(FE);
687
688  if (I == PFL.end()) // No tokens available?
689    return 0;
690
691  const PTHFileData& FileData = *I;
692
693  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
694  // Compute the offset of the token data within the buffer.
695  const unsigned char* data = BufStart + FileData.getTokenOffset();
696
697  // Get the location of pp-conditional table.
698  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
699  uint32_t Len = ReadLE32(ppcond);
700  if (Len == 0) ppcond = 0;
701
702  assert(PP && "No preprocessor set yet!");
703  return new PTHLexer(*PP, FID, data, ppcond, *this);
704}
705