Preprocessor.h revision 23f77e59718385512984d4e2a021bef52b9f6ddf
1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//
3116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//                     The LLVM Compiler Infrastructure
4116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//
5116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// This file is distributed under the University of Illinois Open Source
6116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// License. See LICENSE.TXT for details.
7116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//
8116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//===----------------------------------------------------------------------===//
9116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//
10116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//  This file defines the Preprocessor interface.
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
12116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch//===----------------------------------------------------------------------===//
13116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
14116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#define LLVM_CLANG_LEX_PREPROCESSOR_H
16116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
17116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Lex/Lexer.h"
18116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Lex/PTHLexer.h"
19116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Lex/PPCallbacks.h"
20116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Lex/TokenLexer.h"
21116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Lex/PTHManager.h"
22116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Basic/Builtins.h"
23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Basic/Diagnostic.h"
24116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Basic/IdentifierTable.h"
25116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "clang/Basic/SourceLocation.h"
26116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "llvm/ADT/DenseMap.h"
27116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "llvm/ADT/OwningPtr.h"
28116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "llvm/ADT/SmallVector.h"
29116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "llvm/Support/Allocator.h"
30116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include <vector>
31116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
32116680a4aac90f2aa7413d9095a592090648e557Ben Murdochnamespace clang {
33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
34116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass SourceManager;
35116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass FileManager;
36116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass FileEntry;
37116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass HeaderSearch;
38116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass PragmaNamespace;
39116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass PragmaHandler;
40116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass CommentHandler;
41116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass ScratchBuffer;
42116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass TargetInfo;
43116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass PPCallbacks;
44116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass DirectoryLookup;
45116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/// Preprocessor - This object engages in a tight little dance with the lexer to
471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci/// efficiently preprocess tokens.  Lexers know only about tokens within a
48116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch/// single source file, and don't know anything about preprocessor-level issues
49116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch/// like the #include stack, token expansion, etc.
50116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch///
51116680a4aac90f2aa7413d9095a592090648e557Ben Murdochclass Preprocessor {
52116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  Diagnostic        *Diags;
53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  LangOptions        Features;
54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  const TargetInfo  &Target;
55116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  FileManager       &FileMgr;
56116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  SourceManager     &SourceMgr;
57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ScratchBuffer     *ScratchBuf;
58116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  HeaderSearch      &HeaderInfo;
59116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
60116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// PTH - An optional PTHManager object used for getting tokens from
61116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///  a token cache rather than lexing the original source file.
62116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  llvm::OwningPtr<PTHManager> PTH;
631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// BP - A BumpPtrAllocator object used to quickly allocate and release
65116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///  objects internal to the Preprocessor.
661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  llvm::BumpPtrAllocator BP;
671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// Identifiers for builtin macros and other builtins.
691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
70116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
71116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
72116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
75116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident_Pragma, *Ident__VA_ARGS__; // _Pragma, __VA_ARGS__
76116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__has_feature;              // __has_feature
77116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__has_builtin;              // __has_builtin
78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__has_include;              // __has_include
79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *Ident__has_include_next;         // __has_include_next
80116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
81116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  SourceLocation DATELoc, TIMELoc;
82116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned CounterValue;  // Next __COUNTER__ value.
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  enum {
85116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    /// MaxIncludeStackDepth - Maximum depth of #includes.
86116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    MaxAllowedIncludeStackDepth = 200
87116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  };
88116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
89116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // State that is set before the preprocessor begins.
90116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool KeepComments : 1;
91116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool KeepMacroComments : 1;
92116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
93116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // State that changes while the preprocessor runs:
94116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
95116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
96116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Whether the preprocessor owns the header search object.
97116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool OwnsHeaderSearch : 1;
98116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
99116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// DisableMacroExpansion - True if macro expansion is disabled.
100116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool DisableMacroExpansion : 1;
101116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
102116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Identifiers - This is mapping/lookup information for all identifiers in
103116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// the program, including program keywords.
104116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  mutable IdentifierTable Identifiers;
105116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
106116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Selectors - This table contains all the selectors in the program. Unlike
107116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// IdentifierTable above, this table *isn't* populated by the preprocessor.
108116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// It is declared/instantiated here because it's role/lifetime is
109116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// conceptually similar the IdentifierTable. In addition, the current control
110116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// flow (in clang::ParseAST()), make it convenient to put here.
111116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
112116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// the lifetime fo the preprocessor.
113116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  SelectorTable Selectors;
114116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
115116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// BuiltinInfo - Information about builtins.
116116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  Builtin::Context BuiltinInfo;
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
118116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// PragmaHandlers - This tracks all of the pragmas that the client registered
119116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// with this preprocessor.
120116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  PragmaNamespace *PragmaHandlers;
121116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
122116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// \brief Tracks all of the comment handlers that the client registered
123116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// with this preprocessor.
124116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  std::vector<CommentHandler *> CommentHandlers;
125116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
126116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// \brief The file that we're performing code-completion for, if any.
127116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  const FileEntry *CodeCompletionFile;
128116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
129116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// CurLexer - This is the current top of the stack that we're lexing from if
130116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// not expanding a macro and we are lexing directly from source code.
131116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
132116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  llvm::OwningPtr<Lexer> CurLexer;
1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// CurPTHLexer - This is the current top of stack that we're lexing from if
1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///  not expanding from a macro and we are lexing from a PTH cache.
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  llvm::OwningPtr<PTHLexer> CurPTHLexer;
1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// CurPPLexer - This is the current top of the stack what we're lexing from
1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///  if not expanding a macro.  This is an alias for either CurLexer or
141116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///  CurPTHLexer.
142116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  PreprocessorLexer *CurPPLexer;
143116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// CurLookup - The DirectoryLookup structure used to find the current
1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// implement #include_next and find directory-specific properties.
1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const DirectoryLookup *CurDirLookup;
1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// CurTokenLexer - This is the current macro we are expanding, if we are
1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  llvm::OwningPtr<TokenLexer> CurTokenLexer;
1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// IncludeMacroStack - This keeps track of the stack of files currently
1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// #included, and macros currently being expanded from, not counting
1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// CurLexer/CurTokenLexer.
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  struct IncludeStackInfo {
1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    Lexer                 *TheLexer;
1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    PTHLexer              *ThePTHLexer;
1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    PreprocessorLexer     *ThePPLexer;
1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    TokenLexer            *TheTokenLexer;
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    const DirectoryLookup *TheDirLookup;
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    IncludeStackInfo(Lexer *L, PTHLexer* P, PreprocessorLexer* PPL,
164116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                     TokenLexer* TL, const DirectoryLookup *D)
165116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      : TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL), TheTokenLexer(TL),
166116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        TheDirLookup(D) {}
167116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  };
168116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  std::vector<IncludeStackInfo> IncludeMacroStack;
169116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// Callbacks - These are actions invoked when some preprocessor activity is
171116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// encountered (e.g. a file is #included, etc).
172116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  PPCallbacks *Callbacks;
173116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
174116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
175116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// to the actual definition of the macro.
1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// allocation.
1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// FIXME: why not use a singly linked list?
1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  std::vector<MacroInfo*> MICache;
1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
183116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// reused for quick allocation.
1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  MacroArgs *MacroArgCache;
1861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  friend class MacroArgs;
187116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // Various statistics we track for performance analysis.
189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
1901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned NumIf, NumElse, NumEndif;
1911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
1921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned NumSkipped;
1951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Predefines - This string is the predefined macros that preprocessor
197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// should use from the command line etc.
198116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  std::string Predefines;
199116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
2011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  enum { TokenLexerCacheSize = 8 };
202116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned NumCachedTokenLexers;
203116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  TokenLexer *TokenLexerCache[TokenLexerCacheSize];
204116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
205116680a4aac90f2aa7413d9095a592090648e557Ben Murdochprivate:  // Cached tokens state.
206116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  typedef llvm::SmallVector<Token, 1> CachedTokensTy;
207116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
208116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// CachedTokens - Cached tokens are stored here when we do backtracking or
209116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// lookahead. They are "lexed" by the CachingLex() method.
2101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  CachedTokensTy CachedTokens;
211116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
212116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// CachedLexPos - The position of the cached token that CachingLex() should
213116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// "lex" next. If it points beyond the CachedTokens vector, it means that
2141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// a normal Lex() should be invoked.
215116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  CachedTokensTy::size_type CachedLexPos;
216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
217116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// BacktrackPositions - Stack of backtrack positions, allowing nested
2181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
219116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// indicate where CachedLexPos should be set when the BackTrack() method is
2201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// invoked (at which point the last position is popped).
221116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  std::vector<CachedTokensTy::size_type> BacktrackPositions;
2221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccipublic:
2241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  Preprocessor(Diagnostic &diags, const LangOptions &opts,
2251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci               const TargetInfo &target,
226116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch               SourceManager &SM, HeaderSearch &Headers,
227116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch               IdentifierInfoLookup *IILookup = 0,
228116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch               bool OwnsHeaderSearch = false);
2291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ~Preprocessor();
2311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  Diagnostic &getDiagnostics() const { return *Diags; }
2331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void setDiagnostics(Diagnostic &D) { Diags = &D; }
2341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const LangOptions &getLangOptions() const { return Features; }
2361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const TargetInfo &getTargetInfo() const { return Target; }
2371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  FileManager &getFileManager() const { return FileMgr; }
2381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  SourceManager &getSourceManager() const { return SourceMgr; }
2391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
2401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  IdentifierTable &getIdentifierTable() { return Identifiers; }
2421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  SelectorTable &getSelectorTable() { return Selectors; }
2431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
2441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
2451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void setPTHManager(PTHManager* pm);
2471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PTHManager *getPTHManager() { return PTH.get(); }
2491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// SetCommentRetentionState - Control whether or not the preprocessor retains
2511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// comments in output.
2521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
2531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    this->KeepComments = KeepComments | KeepMacroComments;
2541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    this->KeepMacroComments = KeepMacroComments;
2551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
2561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  bool getCommentRetentionState() const { return KeepComments; }
2581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// isCurrentLexer - Return true if we are lexing directly from the specified
2601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// lexer.
2611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  bool isCurrentLexer(const PreprocessorLexer *L) const {
2621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return CurPPLexer == L;
2631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
2641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// getCurrentLexer - Return the current lexer being lexed from.  Note
2661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// that this ignores any potentially active macro expansions and _Pragma
2671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// expansions going on at the time.
2681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
2691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// getCurrentFileLexer - Return the current file lexer being lexed from.  Note
2711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// that this ignores any potentially active macro expansions and _Pragma
2721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// expansions going on at the time.
2731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PreprocessorLexer *getCurrentFileLexer() const;
2741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// getPPCallbacks/setPPCallbacks - Accessors for preprocessor callbacks.
2761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// Note that this class takes ownership of any PPCallbacks object given to
2771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// it.
2781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  PPCallbacks *getPPCallbacks() const { return Callbacks; }
2791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void setPPCallbacks(PPCallbacks *C) {
280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    if (Callbacks)
281116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      C = new PPChainedCallbacks(C, Callbacks);
282116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    Callbacks = C;
283116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  }
284116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
285116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
286116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// or null if it isn't #define'd.
287116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  MacroInfo *getMacroInfo(IdentifierInfo *II) const {
288116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    return II->hasMacroDefinition() ? Macros.find(II)->second : 0;
2891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
2901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// setMacroInfo - Specify a macro for this identifier.
2921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
2931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void setMacroInfo(IdentifierInfo *II, MacroInfo *MI);
2941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
2961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// state of the macro table.  This visits every currently-defined macro.
297116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  typedef llvm::DenseMap<IdentifierInfo*,
298116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                         MacroInfo*>::const_iterator macro_iterator;
299116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  macro_iterator macro_begin() const { return Macros.begin(); }
300116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  macro_iterator macro_end() const { return Macros.end(); }
301116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
302116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
303116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
3041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  const std::string &getPredefines() const { return Predefines; }
3051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// setPredefines - Set the predefines for this Preprocessor.  These
3061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// predefines are automatically injected when parsing the main file.
307116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void setPredefines(const char *P) { Predefines = P; }
308116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void setPredefines(const std::string &P) { Predefines = P; }
309116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
310116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// getIdentifierInfo - Return information about the specified preprocessor
311116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// identifier token.  The version of this method that takes two character
312116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// pointers is preferred unless the identifier is already available as a
313116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// string (this avoids allocation and copying of memory to construct an
314116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// std::string).
315116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  IdentifierInfo *getIdentifierInfo(llvm::StringRef Name) const {
316116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    return &Identifiers.get(Name);
317116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  }
318116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
319116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
320116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// If 'Namespace' is non-null, then it is a token required to exist on the
321116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
322116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void AddPragmaHandler(const char *Namespace, PragmaHandler *Handler);
323116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
324116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// RemovePragmaHandler - Remove the specific pragma handler from
325116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// the preprocessor. If \arg Namespace is non-null, then it should
326116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// be the namespace that \arg Handler was added to. It is an error
327116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// to remove a handler that has not been registered.
328116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void RemovePragmaHandler(const char *Namespace, PragmaHandler *Handler);
329116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
330116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// \brief Add the specified comment handler to the preprocessor.
331116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void AddCommentHandler(CommentHandler *Handler);
332116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
333116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// \brief Remove the specified comment handler.
334116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///
335116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// It is an error to remove a handler that has not been registered.
336116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void RemoveCommentHandler(CommentHandler *Handler);
337116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
338116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// EnterMainSourceFile - Enter the specified FileID as the main source file,
3391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// which implicitly adds the builtin defines etc.
3401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void EnterMainSourceFile();
3411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// EnterSourceFile - Add a source file to the top of the include stack and
3431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// start lexing tokens from it instead of the current buffer.  Return true
344116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// and fill in ErrorStr with the error information on failure.
345116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
346116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                       std::string &ErrorStr);
3471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// EnterMacro - Add a Macro to the top of the include stack and start lexing
3491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// tokens from it instead of the current buffer.  Args specifies the
3501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// tokens input to a function-like macro.
3511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
3521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// ILEnd specifies the location of the ')' for a function-like macro or the
3531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// identifier for an object-like macro.
3541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args);
3551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// EnterTokenStream - Add a "macro" context to the top of the include stack,
3571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// which will cause the lexer to start returning the specified tokens.
3581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
3591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// If DisableMacroExpansion is true, tokens lexed from the token stream will
3601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// not be subject to further macro expansion.  Otherwise, these tokens will
3611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// be re-macro-expanded when/if expansion is enabled.
3621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
3631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// If OwnsTokens is false, this method assumes that the specified stream of
3641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// tokens has a permanent owner somewhere, so they do not need to be copied.
3651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// If it is true, it assumes the array of tokens is allocated with new[] and
3661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// must be freed.
3671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
3681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void EnterTokenStream(const Token *Toks, unsigned NumToks,
3691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        bool DisableMacroExpansion, bool OwnsTokens);
3701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
3721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// lexer stack.  This should only be used in situations where the current
3731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// state of the top-of-stack lexer is known.
3741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  void RemoveTopOfLexerStack();
3751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// EnableBacktrackAtThisPos - From the point that this method is called, and
3771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
3781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
3791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// make the Preprocessor re-lex the same tokens.
3801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ///
381116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
3821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
3831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
384116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///
385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
386116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
387116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  /// tokens will continue indefinitely.
388116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  ///
389116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void EnableBacktrackAtThisPos();
390
391  /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
392  void CommitBacktrackedTokens();
393
394  /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
395  /// EnableBacktrackAtThisPos() was previously called.
396  void Backtrack();
397
398  /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
399  /// caching of tokens is on.
400  bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
401
402  /// Lex - To lex a token from the preprocessor, just pull a token from the
403  /// current lexer or macro object.
404  void Lex(Token &Result) {
405    if (CurLexer)
406      CurLexer->Lex(Result);
407    else if (CurPTHLexer)
408      CurPTHLexer->Lex(Result);
409    else if (CurTokenLexer)
410      CurTokenLexer->Lex(Result);
411    else
412      CachingLex(Result);
413  }
414
415  /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
416  /// something not a comment.  This is useful in -E -C mode where comments
417  /// would foul up preprocessor directive handling.
418  void LexNonComment(Token &Result) {
419    do
420      Lex(Result);
421    while (Result.getKind() == tok::comment);
422  }
423
424  /// LexUnexpandedToken - This is just like Lex, but this disables macro
425  /// expansion of identifier tokens.
426  void LexUnexpandedToken(Token &Result) {
427    // Disable macro expansion.
428    bool OldVal = DisableMacroExpansion;
429    DisableMacroExpansion = true;
430    // Lex the token.
431    Lex(Result);
432
433    // Reenable it.
434    DisableMacroExpansion = OldVal;
435  }
436
437  /// LookAhead - This peeks ahead N tokens and returns that token without
438  /// consuming any tokens.  LookAhead(0) returns the next token that would be
439  /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
440  /// returns normal tokens after phase 5.  As such, it is equivalent to using
441  /// 'Lex', not 'LexUnexpandedToken'.
442  const Token &LookAhead(unsigned N) {
443    if (CachedLexPos + N < CachedTokens.size())
444      return CachedTokens[CachedLexPos+N];
445    else
446      return PeekAhead(N+1);
447  }
448
449  /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
450  /// this allows to revert a specific number of tokens.
451  /// Note that the number of tokens being reverted should be up to the last
452  /// backtrack position, not more.
453  void RevertCachedTokens(unsigned N) {
454    assert(isBacktrackEnabled() &&
455           "Should only be called when tokens are cached for backtracking");
456    assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
457         && "Should revert tokens up to the last backtrack position, not more");
458    assert(signed(CachedLexPos) - signed(N) >= 0 &&
459           "Corrupted backtrack positions ?");
460    CachedLexPos -= N;
461  }
462
463  /// EnterToken - Enters a token in the token stream to be lexed next. If
464  /// BackTrack() is called afterwards, the token will remain at the insertion
465  /// point.
466  void EnterToken(const Token &Tok) {
467    EnterCachingLexMode();
468    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
469  }
470
471  /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
472  /// tokens (because backtrack is enabled) it should replace the most recent
473  /// cached tokens with the given annotation token. This function has no effect
474  /// if backtracking is not enabled.
475  ///
476  /// Note that the use of this function is just for optimization; so that the
477  /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
478  /// invoked.
479  void AnnotateCachedTokens(const Token &Tok) {
480    assert(Tok.isAnnotation() && "Expected annotation token");
481    if (CachedLexPos != 0 && isBacktrackEnabled())
482      AnnotatePreviousCachedTokens(Tok);
483  }
484
485  /// \brief Replace the last token with an annotation token.
486  ///
487  /// Like AnnotateCachedTokens(), this routine replaces an
488  /// already-parsed (and resolved) token with an annotation
489  /// token. However, this routine only replaces the last token with
490  /// the annotation token; it does not affect any other cached
491  /// tokens. This function has no effect if backtracking is not
492  /// enabled.
493  void ReplaceLastTokenWithAnnotation(const Token &Tok) {
494    assert(Tok.isAnnotation() && "Expected annotation token");
495    if (CachedLexPos != 0 && isBacktrackEnabled())
496      CachedTokens[CachedLexPos-1] = Tok;
497  }
498
499  /// \brief Specify the point at which code-completion will be performed.
500  ///
501  /// \param File the file in which code completion should occur. If
502  /// this file is included multiple times, code-completion will
503  /// perform completion the first time it is included. If NULL, this
504  /// function clears out the code-completion point.
505  ///
506  /// \param Line the line at which code completion should occur
507  /// (1-based).
508  ///
509  /// \param Column the column at which code completion should occur
510  /// (1-based).
511  ///
512  /// \returns true if an error occurred, false otherwise.
513  bool SetCodeCompletionPoint(const FileEntry *File,
514                              unsigned Line, unsigned Column);
515
516  /// \brief Determine if this source location refers into the file
517  /// for which we are performing code completion.
518  bool isCodeCompletionFile(SourceLocation FileLoc) const;
519
520  /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
521  /// the specified Token's location, translating the token's start
522  /// position in the current buffer into a SourcePosition object for rendering.
523  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
524    return Diags->Report(FullSourceLoc(Loc, getSourceManager()), DiagID);
525  }
526
527  DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) {
528    return Diags->Report(FullSourceLoc(Tok.getLocation(), getSourceManager()),
529                         DiagID);
530  }
531
532  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
533  /// token is the characters used to represent the token in the source file
534  /// after trigraph expansion and escaped-newline folding.  In particular, this
535  /// wants to get the true, uncanonicalized, spelling of things like digraphs
536  /// UCNs, etc.
537  std::string getSpelling(const Token &Tok) const;
538
539  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
540  /// token is the characters used to represent the token in the source file
541  /// after trigraph expansion and escaped-newline folding.  In particular, this
542  /// wants to get the true, uncanonicalized, spelling of things like digraphs
543  /// UCNs, etc.
544  static std::string getSpelling(const Token &Tok,
545                                 const SourceManager &SourceMgr,
546                                 const LangOptions &Features);
547
548  /// getSpelling - This method is used to get the spelling of a token into a
549  /// preallocated buffer, instead of as an std::string.  The caller is required
550  /// to allocate enough space for the token, which is guaranteed to be at least
551  /// Tok.getLength() bytes long.  The length of the actual result is returned.
552  ///
553  /// Note that this method may do two possible things: it may either fill in
554  /// the buffer specified with characters, or it may *change the input pointer*
555  /// to point to a constant buffer with the data already in it (avoiding a
556  /// copy).  The caller is not allowed to modify the returned buffer pointer
557  /// if an internal buffer is returned.
558  unsigned getSpelling(const Token &Tok, const char *&Buffer) const;
559
560  /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
561  /// with length 1, return the character.
562  char getSpellingOfSingleCharacterNumericConstant(const Token &Tok) const {
563    assert(Tok.is(tok::numeric_constant) &&
564           Tok.getLength() == 1 && "Called on unsupported token");
565    assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
566
567    // If the token is carrying a literal data pointer, just use it.
568    if (const char *D = Tok.getLiteralData())
569      return *D;
570
571    // Otherwise, fall back on getCharacterData, which is slower, but always
572    // works.
573    return *SourceMgr.getCharacterData(Tok.getLocation());
574  }
575
576  /// CreateString - Plop the specified string into a scratch buffer and set the
577  /// specified token's location and length to it.  If specified, the source
578  /// location provides a location of the instantiation point of the token.
579  void CreateString(const char *Buf, unsigned Len,
580                    Token &Tok, SourceLocation SourceLoc = SourceLocation());
581
582  /// \brief Computes the source location just past the end of the
583  /// token at this source location.
584  ///
585  /// This routine can be used to produce a source location that
586  /// points just past the end of the token referenced by \p Loc, and
587  /// is generally used when a diagnostic needs to point just after a
588  /// token where it expected something different that it received. If
589  /// the returned source location would not be meaningful (e.g., if
590  /// it points into a macro), this routine returns an invalid
591  /// source location.
592  SourceLocation getLocForEndOfToken(SourceLocation Loc);
593
594  /// DumpToken - Print the token to stderr, used for debugging.
595  ///
596  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
597  void DumpLocation(SourceLocation Loc) const;
598  void DumpMacro(const MacroInfo &MI) const;
599
600  /// AdvanceToTokenCharacter - Given a location that specifies the start of a
601  /// token, return a new location that specifies a character within the token.
602  SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char);
603
604  /// IncrementPasteCounter - Increment the counters for the number of token
605  /// paste operations performed.  If fast was specified, this is a 'fast paste'
606  /// case we handled.
607  ///
608  void IncrementPasteCounter(bool isFast) {
609    if (isFast)
610      ++NumFastTokenPaste;
611    else
612      ++NumTokenPaste;
613  }
614
615  void PrintStats();
616
617  /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
618  /// comment (/##/) in microsoft mode, this method handles updating the current
619  /// state, returning the token on the next source line.
620  void HandleMicrosoftCommentPaste(Token &Tok);
621
622  //===--------------------------------------------------------------------===//
623  // Preprocessor callback methods.  These are invoked by a lexer as various
624  // directives and events are found.
625
626  /// LookUpIdentifierInfo - Given a tok::identifier token, look up the
627  /// identifier information for the token and install it into the token.
628  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier,
629                                       const char *BufPtr = 0) const;
630
631  /// HandleIdentifier - This callback is invoked when the lexer reads an
632  /// identifier and has filled in the tokens IdentifierInfo member.  This
633  /// callback potentially macro expands it or turns it into a named token (like
634  /// 'for').
635  void HandleIdentifier(Token &Identifier);
636
637
638  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
639  /// the current file.  This either returns the EOF token and returns true, or
640  /// pops a level off the include stack and returns false, at which point the
641  /// client should call lex again.
642  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
643
644  /// HandleEndOfTokenLexer - This callback is invoked when the current
645  /// TokenLexer hits the end of its token stream.
646  bool HandleEndOfTokenLexer(Token &Result);
647
648  /// HandleDirective - This callback is invoked when the lexer sees a # token
649  /// at the start of a line.  This consumes the directive, modifies the
650  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
651  /// read is the correct one.
652  void HandleDirective(Token &Result);
653
654  /// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
655  /// not, emit a diagnostic and consume up until the eom.  If EnableMacros is
656  /// true, then we consider macros that expand to zero tokens as being ok.
657  void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
658
659  /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
660  /// current line until the tok::eom token is found.
661  void DiscardUntilEndOfDirective();
662
663  /// SawDateOrTime - This returns true if the preprocessor has seen a use of
664  /// __DATE__ or __TIME__ in the file so far.
665  bool SawDateOrTime() const {
666    return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
667  }
668  unsigned getCounterValue() const { return CounterValue; }
669  void setCounterValue(unsigned V) { CounterValue = V; }
670
671  /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
672  ///  SourceLocation.
673  MacroInfo* AllocateMacroInfo(SourceLocation L);
674
675  /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
676  /// checked and spelled filename, e.g. as an operand of #include. This returns
677  /// true if the input filename was in <>'s or false if it were in ""'s.  The
678  /// caller is expected to provide a buffer that is large enough to hold the
679  /// spelling of the filename, but is also expected to handle the case when
680  /// this method decides to use a different buffer.
681  bool GetIncludeFilenameSpelling(SourceLocation Loc,
682                                  const char *&BufStart, const char *&BufEnd);
683
684  /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
685  /// return null on failure.  isAngled indicates whether the file reference is
686  /// for system #include's or not (i.e. using <> instead of "").
687  const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd,
688                              bool isAngled, const DirectoryLookup *FromDir,
689                              const DirectoryLookup *&CurDir);
690
691  /// GetCurLookup - The DirectoryLookup structure used to find the current
692  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
693  /// implement #include_next and find directory-specific properties.
694  const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
695
696  /// isInPrimaryFile - Return true if we're in the top-level file, not in a
697  /// #include.
698  bool isInPrimaryFile() const;
699
700  /// ConcatenateIncludeName - Handle cases where the #include name is expanded
701  /// from a macro as multiple tokens, which need to be glued together.  This
702  /// occurs for code like:
703  ///    #define FOO <a/b.h>
704  ///    #include FOO
705  /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
706  ///
707  /// This code concatenates and consumes tokens up to the '>' token.  It returns
708  /// false if the > was found, otherwise it returns true if it finds and consumes
709  /// the EOM marker.
710  bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer);
711
712private:
713
714  void PushIncludeMacroStack() {
715    IncludeMacroStack.push_back(IncludeStackInfo(CurLexer.take(),
716                                                 CurPTHLexer.take(),
717                                                 CurPPLexer,
718                                                 CurTokenLexer.take(),
719                                                 CurDirLookup));
720    CurPPLexer = 0;
721  }
722
723  void PopIncludeMacroStack() {
724    CurLexer.reset(IncludeMacroStack.back().TheLexer);
725    CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
726    CurPPLexer = IncludeMacroStack.back().ThePPLexer;
727    CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
728    CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
729    IncludeMacroStack.pop_back();
730  }
731
732  /// ReleaseMacroInfo - Release the specified MacroInfo.  This memory will
733  ///  be reused for allocating new MacroInfo objects.
734  void ReleaseMacroInfo(MacroInfo* MI);
735
736  /// ReadMacroName - Lex and validate a macro name, which occurs after a
737  /// #define or #undef.  This emits a diagnostic, sets the token kind to eom,
738  /// and discards the rest of the macro line if the macro name is invalid.
739  void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
740
741  /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
742  /// definition has just been read.  Lex the rest of the arguments and the
743  /// closing ), updating MI with what we learn.  Return true if an error occurs
744  /// parsing the arg list.
745  bool ReadMacroDefinitionArgList(MacroInfo *MI);
746
747  /// SkipExcludedConditionalBlock - We just read a #if or related directive and
748  /// decided that the subsequent tokens are in the #if'd out portion of the
749  /// file.  Lex the rest of the file, until we see an #endif.  If
750  /// FoundNonSkipPortion is true, then we have already emitted code for part of
751  /// this #if directive, so #else/#elif blocks should never be entered. If
752  /// FoundElse is false, then #else directives are ok, if not, then we have
753  /// already seen one so a #else directive is a duplicate.  When this returns,
754  /// the caller can lex the first valid token.
755  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
756                                    bool FoundNonSkipPortion, bool FoundElse);
757
758  /// PTHSkipExcludedConditionalBlock - A fast PTH version of
759  ///  SkipExcludedConditionalBlock.
760  void PTHSkipExcludedConditionalBlock();
761
762  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
763  /// may occur after a #if or #elif directive and return it as a bool.  If the
764  /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
765  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
766
767  /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
768  /// #pragma GCC poison/system_header/dependency and #pragma once.
769  void RegisterBuiltinPragmas();
770
771  /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
772  /// identifier table.
773  void RegisterBuiltinMacros();
774
775  /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
776  /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
777  /// the macro should not be expanded return true, otherwise return false.
778  bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
779
780  /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
781  /// lexed is a '('.  If so, consume the token and return true, if not, this
782  /// method should have no observable side-effect on the lexed tokens.
783  bool isNextPPTokenLParen();
784
785  /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
786  /// invoked to read all of the formal arguments specified for the macro
787  /// invocation.  This returns null on error.
788  MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
789                                       SourceLocation &InstantiationEnd);
790
791  /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
792  /// as a builtin macro, handle it and return the next token as 'Tok'.
793  void ExpandBuiltinMacro(Token &Tok);
794
795  /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
796  /// return the first token after the directive.  The _Pragma token has just
797  /// been read into 'Tok'.
798  void Handle_Pragma(Token &Tok);
799
800  /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
801  /// start lexing tokens from it instead of the current buffer.
802  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
803
804  /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
805  /// start getting tokens from it using the PTH cache.
806  void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
807
808  /// IsFileLexer - Returns true if we are lexing from a file and not a
809  ///  pragma or a macro.
810  static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
811    return L ? !L->isPragmaLexer() : P != 0;
812  }
813
814  static bool IsFileLexer(const IncludeStackInfo& I) {
815    return IsFileLexer(I.TheLexer, I.ThePPLexer);
816  }
817
818  bool IsFileLexer() const {
819    return IsFileLexer(CurLexer.get(), CurPPLexer);
820  }
821
822  //===--------------------------------------------------------------------===//
823  // Caching stuff.
824  void CachingLex(Token &Result);
825  bool InCachingLexMode() const { return CurPPLexer == 0 && CurTokenLexer == 0;}
826  void EnterCachingLexMode();
827  void ExitCachingLexMode() {
828    if (InCachingLexMode())
829      RemoveTopOfLexerStack();
830  }
831  const Token &PeekAhead(unsigned N);
832  void AnnotatePreviousCachedTokens(const Token &Tok);
833
834  //===--------------------------------------------------------------------===//
835  /// Handle*Directive - implement the various preprocessor directives.  These
836  /// should side-effect the current preprocessor object so that the next call
837  /// to Lex() will return the appropriate token next.
838  void HandleLineDirective(Token &Tok);
839  void HandleDigitDirective(Token &Tok);
840  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
841  void HandleIdentSCCSDirective(Token &Tok);
842
843  // File inclusion.
844  void HandleIncludeDirective(Token &Tok,
845                              const DirectoryLookup *LookupFrom = 0,
846                              bool isImport = false);
847  void HandleIncludeNextDirective(Token &Tok);
848  void HandleIncludeMacrosDirective(Token &Tok);
849  void HandleImportDirective(Token &Tok);
850
851  // Macro handling.
852  void HandleDefineDirective(Token &Tok);
853  void HandleUndefDirective(Token &Tok);
854  // HandleAssertDirective(Token &Tok);
855  // HandleUnassertDirective(Token &Tok);
856
857  // Conditional Inclusion.
858  void HandleIfdefDirective(Token &Tok, bool isIfndef,
859                            bool ReadAnyTokensBeforeDirective);
860  void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
861  void HandleEndifDirective(Token &Tok);
862  void HandleElseDirective(Token &Tok);
863  void HandleElifDirective(Token &Tok);
864
865  // Pragmas.
866  void HandlePragmaDirective();
867public:
868  void HandlePragmaOnce(Token &OnceTok);
869  void HandlePragmaMark();
870  void HandlePragmaPoison(Token &PoisonTok);
871  void HandlePragmaSystemHeader(Token &SysHeaderTok);
872  void HandlePragmaDependency(Token &DependencyTok);
873  void HandlePragmaComment(Token &CommentTok);
874  void HandleComment(SourceRange Comment);
875};
876
877/// \brief Abstract base class that describes a handler that will receive
878/// source ranges for each of the comments encountered in the source file.
879class CommentHandler {
880public:
881  virtual ~CommentHandler();
882
883  virtual void HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
884};
885
886}  // end namespace clang
887
888#endif
889