Preprocessor.h revision 5d5051f3e79bf754134ccdf7a1dc7778cd80c73e
1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
17#include "clang/Lex/MacroInfo.h"
18#include "clang/Lex/Lexer.h"
19#include "clang/Lex/PTHLexer.h"
20#include "clang/Lex/PPCallbacks.h"
21#include "clang/Lex/TokenLexer.h"
22#include "clang/Lex/PTHManager.h"
23#include "clang/Basic/Builtins.h"
24#include "clang/Basic/Diagnostic.h"
25#include "clang/Basic/IdentifierTable.h"
26#include "clang/Basic/SourceLocation.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/IntrusiveRefCntPtr.h"
29#include "llvm/ADT/SmallPtrSet.h"
30#include "llvm/ADT/OwningPtr.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/Support/Allocator.h"
34#include <vector>
35
36namespace clang {
37
38class SourceManager;
39class ExternalPreprocessorSource;
40class FileManager;
41class FileEntry;
42class HeaderSearch;
43class PragmaNamespace;
44class PragmaHandler;
45class CommentHandler;
46class ScratchBuffer;
47class TargetInfo;
48class PPCallbacks;
49class CodeCompletionHandler;
50class DirectoryLookup;
51class PreprocessingRecord;
52class ModuleLoader;
53
54/// Preprocessor - This object engages in a tight little dance with the lexer to
55/// efficiently preprocess tokens.  Lexers know only about tokens within a
56/// single source file, and don't know anything about preprocessor-level issues
57/// like the #include stack, token expansion, etc.
58///
59class Preprocessor : public llvm::RefCountedBase<Preprocessor> {
60  DiagnosticsEngine        *Diags;
61  LangOptions       &Features;
62  const TargetInfo  *Target;
63  FileManager       &FileMgr;
64  SourceManager     &SourceMgr;
65  ScratchBuffer     *ScratchBuf;
66  HeaderSearch      &HeaderInfo;
67  ModuleLoader      &TheModuleLoader;
68
69  /// \brief External source of macros.
70  ExternalPreprocessorSource *ExternalSource;
71
72
73  /// PTH - An optional PTHManager object used for getting tokens from
74  ///  a token cache rather than lexing the original source file.
75  llvm::OwningPtr<PTHManager> PTH;
76
77  /// BP - A BumpPtrAllocator object used to quickly allocate and release
78  ///  objects internal to the Preprocessor.
79  llvm::BumpPtrAllocator BP;
80
81  /// Identifiers for builtin macros and other builtins.
82  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
83  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
84  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
85  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
86  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
87  IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
88  IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
89  IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
90  IdentifierInfo *Ident__has_feature;              // __has_feature
91  IdentifierInfo *Ident__has_extension;            // __has_extension
92  IdentifierInfo *Ident__has_builtin;              // __has_builtin
93  IdentifierInfo *Ident__has_attribute;            // __has_attribute
94  IdentifierInfo *Ident__has_include;              // __has_include
95  IdentifierInfo *Ident__has_include_next;         // __has_include_next
96  IdentifierInfo *Ident__has_warning;              // __has_warning
97
98  SourceLocation DATELoc, TIMELoc;
99  unsigned CounterValue;  // Next __COUNTER__ value.
100
101  enum {
102    /// MaxIncludeStackDepth - Maximum depth of #includes.
103    MaxAllowedIncludeStackDepth = 200
104  };
105
106  // State that is set before the preprocessor begins.
107  bool KeepComments : 1;
108  bool KeepMacroComments : 1;
109  bool SuppressIncludeNotFoundError : 1;
110
111  // State that changes while the preprocessor runs:
112  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
113
114  /// Whether the preprocessor owns the header search object.
115  bool OwnsHeaderSearch : 1;
116
117  /// DisableMacroExpansion - True if macro expansion is disabled.
118  bool DisableMacroExpansion : 1;
119
120  /// \brief Whether we have already loaded macros from the external source.
121  mutable bool ReadMacrosFromExternalSource : 1;
122
123  /// Identifiers - This is mapping/lookup information for all identifiers in
124  /// the program, including program keywords.
125  mutable IdentifierTable Identifiers;
126
127  /// Selectors - This table contains all the selectors in the program. Unlike
128  /// IdentifierTable above, this table *isn't* populated by the preprocessor.
129  /// It is declared/expanded here because it's role/lifetime is
130  /// conceptually similar the IdentifierTable. In addition, the current control
131  /// flow (in clang::ParseAST()), make it convenient to put here.
132  /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
133  /// the lifetime of the preprocessor.
134  SelectorTable Selectors;
135
136  /// BuiltinInfo - Information about builtins.
137  Builtin::Context BuiltinInfo;
138
139  /// PragmaHandlers - This tracks all of the pragmas that the client registered
140  /// with this preprocessor.
141  PragmaNamespace *PragmaHandlers;
142
143  /// \brief Tracks all of the comment handlers that the client registered
144  /// with this preprocessor.
145  std::vector<CommentHandler *> CommentHandlers;
146
147  /// \brief The code-completion handler.
148  CodeCompletionHandler *CodeComplete;
149
150  /// \brief The file that we're performing code-completion for, if any.
151  const FileEntry *CodeCompletionFile;
152
153  /// \brief The offset in file for the code-completion point.
154  unsigned CodeCompletionOffset;
155
156  /// \brief The location for the code-completion point. This gets instantiated
157  /// when the CodeCompletionFile gets #include'ed for preprocessing.
158  SourceLocation CodeCompletionLoc;
159
160  /// \brief The start location for the file of the code-completion point.
161  /// This gets instantiated when the CodeCompletionFile gets #include'ed
162  /// for preprocessing.
163  SourceLocation CodeCompletionFileLoc;
164
165  /// \brief The source location of the 'import' contextual keyword we just
166  /// lexed, if any.
167  SourceLocation ModuleImportLoc;
168
169  /// \brief The module import path that we're currently processing.
170  llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2>
171    ModuleImportPath;
172
173  /// \brief Whether the module import expectes an identifier next. Otherwise,
174  /// it expects a '.' or ';'.
175  bool ModuleImportExpectsIdentifier;
176
177  /// \brief The source location of the currently-active
178  /// #pragma clang arc_cf_code_audited begin.
179  SourceLocation PragmaARCCFCodeAuditedLoc;
180
181  /// \brief True if we hit the code-completion point.
182  bool CodeCompletionReached;
183
184  /// \brief The number of bytes that we will initially skip when entering the
185  /// main file, which is used when loading a precompiled preamble, along
186  /// with a flag that indicates whether skipping this number of bytes will
187  /// place the lexer at the start of a line.
188  std::pair<unsigned, bool> SkipMainFilePreamble;
189
190  /// CurLexer - This is the current top of the stack that we're lexing from if
191  /// not expanding a macro and we are lexing directly from source code.
192  ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
193  llvm::OwningPtr<Lexer> CurLexer;
194
195  /// CurPTHLexer - This is the current top of stack that we're lexing from if
196  ///  not expanding from a macro and we are lexing from a PTH cache.
197  ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
198  llvm::OwningPtr<PTHLexer> CurPTHLexer;
199
200  /// CurPPLexer - This is the current top of the stack what we're lexing from
201  ///  if not expanding a macro.  This is an alias for either CurLexer or
202  ///  CurPTHLexer.
203  PreprocessorLexer *CurPPLexer;
204
205  /// CurLookup - The DirectoryLookup structure used to find the current
206  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
207  /// implement #include_next and find directory-specific properties.
208  const DirectoryLookup *CurDirLookup;
209
210  /// CurTokenLexer - This is the current macro we are expanding, if we are
211  /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
212  llvm::OwningPtr<TokenLexer> CurTokenLexer;
213
214  /// \brief The kind of lexer we're currently working with.
215  enum CurLexerKind {
216    CLK_Lexer,
217    CLK_PTHLexer,
218    CLK_TokenLexer,
219    CLK_CachingLexer,
220    CLK_LexAfterModuleImport
221  } CurLexerKind;
222
223  /// IncludeMacroStack - This keeps track of the stack of files currently
224  /// #included, and macros currently being expanded from, not counting
225  /// CurLexer/CurTokenLexer.
226  struct IncludeStackInfo {
227    enum CurLexerKind     CurLexerKind;
228    Lexer                 *TheLexer;
229    PTHLexer              *ThePTHLexer;
230    PreprocessorLexer     *ThePPLexer;
231    TokenLexer            *TheTokenLexer;
232    const DirectoryLookup *TheDirLookup;
233
234    IncludeStackInfo(enum CurLexerKind K, Lexer *L, PTHLexer* P,
235                     PreprocessorLexer* PPL,
236                     TokenLexer* TL, const DirectoryLookup *D)
237      : CurLexerKind(K), TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL),
238        TheTokenLexer(TL), TheDirLookup(D) {}
239  };
240  std::vector<IncludeStackInfo> IncludeMacroStack;
241
242  /// Callbacks - These are actions invoked when some preprocessor activity is
243  /// encountered (e.g. a file is #included, etc).
244  PPCallbacks *Callbacks;
245
246  /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
247  /// to the actual definition of the macro.
248  llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
249
250  /// \brief Macros that we want to warn because they are not used at the end
251  /// of the translation unit; we store just their SourceLocations instead
252  /// something like MacroInfo*. The benefit of this is that when we are
253  /// deserializing from PCH, we don't need to deserialize identifier & macros
254  /// just so that we can report that they are unused, we just warn using
255  /// the SourceLocations of this set (that will be filled by the ASTReader).
256  /// We are using SmallPtrSet instead of a vector for faster removal.
257  typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
258  WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
259
260  /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
261  /// reused for quick allocation.
262  MacroArgs *MacroArgCache;
263  friend class MacroArgs;
264
265  /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
266  /// push_macro directive, we keep a MacroInfo stack used to restore
267  /// previous macro value.
268  llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
269
270  // Various statistics we track for performance analysis.
271  unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
272  unsigned NumIf, NumElse, NumEndif;
273  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
274  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
275  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
276  unsigned NumSkipped;
277
278  /// Predefines - This string is the predefined macros that preprocessor
279  /// should use from the command line etc.
280  std::string Predefines;
281
282  /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
283  enum { TokenLexerCacheSize = 8 };
284  unsigned NumCachedTokenLexers;
285  TokenLexer *TokenLexerCache[TokenLexerCacheSize];
286
287  /// \brief Keeps macro expanded tokens for TokenLexers.
288  //
289  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
290  /// going to lex in the cache and when it finishes the tokens are removed
291  /// from the end of the cache.
292  SmallVector<Token, 16> MacroExpandedTokens;
293  std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
294
295  /// \brief A record of the macro definitions and expansions that
296  /// occurred during preprocessing.
297  ///
298  /// This is an optional side structure that can be enabled with
299  /// \c createPreprocessingRecord() prior to preprocessing.
300  PreprocessingRecord *Record;
301
302private:  // Cached tokens state.
303  typedef SmallVector<Token, 1> CachedTokensTy;
304
305  /// CachedTokens - Cached tokens are stored here when we do backtracking or
306  /// lookahead. They are "lexed" by the CachingLex() method.
307  CachedTokensTy CachedTokens;
308
309  /// CachedLexPos - The position of the cached token that CachingLex() should
310  /// "lex" next. If it points beyond the CachedTokens vector, it means that
311  /// a normal Lex() should be invoked.
312  CachedTokensTy::size_type CachedLexPos;
313
314  /// BacktrackPositions - Stack of backtrack positions, allowing nested
315  /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
316  /// indicate where CachedLexPos should be set when the BackTrack() method is
317  /// invoked (at which point the last position is popped).
318  std::vector<CachedTokensTy::size_type> BacktrackPositions;
319
320  struct MacroInfoChain {
321    MacroInfo MI;
322    MacroInfoChain *Next;
323    MacroInfoChain *Prev;
324  };
325
326  /// MacroInfos are managed as a chain for easy disposal.  This is the head
327  /// of that list.
328  MacroInfoChain *MIChainHead;
329
330  /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
331  /// allocation.
332  MacroInfoChain *MICache;
333
334  MacroInfo *getInfoForMacro(IdentifierInfo *II) const;
335
336public:
337  Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,
338               const TargetInfo *target,
339               SourceManager &SM, HeaderSearch &Headers,
340               ModuleLoader &TheModuleLoader,
341               IdentifierInfoLookup *IILookup = 0,
342               bool OwnsHeaderSearch = false,
343               bool DelayInitialization = false);
344
345  ~Preprocessor();
346
347  /// \brief Initialize the preprocessor, if the constructor did not already
348  /// perform the initialization.
349  ///
350  /// \param Target Information about the target.
351  void Initialize(const TargetInfo &Target);
352
353  DiagnosticsEngine &getDiagnostics() const { return *Diags; }
354  void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
355
356  const LangOptions &getLangOptions() const { return Features; }
357  const TargetInfo &getTargetInfo() const { return *Target; }
358  FileManager &getFileManager() const { return FileMgr; }
359  SourceManager &getSourceManager() const { return SourceMgr; }
360  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
361
362  IdentifierTable &getIdentifierTable() { return Identifiers; }
363  SelectorTable &getSelectorTable() { return Selectors; }
364  Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
365  llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
366
367  void setPTHManager(PTHManager* pm);
368
369  PTHManager *getPTHManager() { return PTH.get(); }
370
371  void setExternalSource(ExternalPreprocessorSource *Source) {
372    ExternalSource = Source;
373  }
374
375  ExternalPreprocessorSource *getExternalSource() const {
376    return ExternalSource;
377  }
378
379  /// \brief Retrieve the module loader associated with this preprocessor.
380  ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
381
382  /// SetCommentRetentionState - Control whether or not the preprocessor retains
383  /// comments in output.
384  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
385    this->KeepComments = KeepComments | KeepMacroComments;
386    this->KeepMacroComments = KeepMacroComments;
387  }
388
389  bool getCommentRetentionState() const { return KeepComments; }
390
391  void SetSuppressIncludeNotFoundError(bool Suppress) {
392    SuppressIncludeNotFoundError = Suppress;
393  }
394
395  bool GetSuppressIncludeNotFoundError() {
396    return SuppressIncludeNotFoundError;
397  }
398
399  /// isCurrentLexer - Return true if we are lexing directly from the specified
400  /// lexer.
401  bool isCurrentLexer(const PreprocessorLexer *L) const {
402    return CurPPLexer == L;
403  }
404
405  /// getCurrentLexer - Return the current lexer being lexed from.  Note
406  /// that this ignores any potentially active macro expansions and _Pragma
407  /// expansions going on at the time.
408  PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
409
410  /// getCurrentFileLexer - Return the current file lexer being lexed from.
411  /// Note that this ignores any potentially active macro expansions and _Pragma
412  /// expansions going on at the time.
413  PreprocessorLexer *getCurrentFileLexer() const;
414
415  /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
416  /// Note that this class takes ownership of any PPCallbacks object given to
417  /// it.
418  PPCallbacks *getPPCallbacks() const { return Callbacks; }
419  void addPPCallbacks(PPCallbacks *C) {
420    if (Callbacks)
421      C = new PPChainedCallbacks(C, Callbacks);
422    Callbacks = C;
423  }
424
425  /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
426  /// or null if it isn't #define'd.
427  MacroInfo *getMacroInfo(IdentifierInfo *II) const {
428    if (!II->hasMacroDefinition())
429      return 0;
430
431    return getInfoForMacro(II);
432  }
433
434  /// setMacroInfo - Specify a macro for this identifier.
435  ///
436  void setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
437                    bool LoadedFromAST = false);
438
439  /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
440  /// state of the macro table.  This visits every currently-defined macro.
441  typedef llvm::DenseMap<IdentifierInfo*,
442                         MacroInfo*>::const_iterator macro_iterator;
443  macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
444  macro_iterator macro_end(bool IncludeExternalMacros = true) const;
445
446  const std::string &getPredefines() const { return Predefines; }
447  /// setPredefines - Set the predefines for this Preprocessor.  These
448  /// predefines are automatically injected when parsing the main file.
449  void setPredefines(const char *P) { Predefines = P; }
450  void setPredefines(const std::string &P) { Predefines = P; }
451
452  /// getIdentifierInfo - Return information about the specified preprocessor
453  /// identifier token.  The version of this method that takes two character
454  /// pointers is preferred unless the identifier is already available as a
455  /// string (this avoids allocation and copying of memory to construct an
456  /// std::string).
457  IdentifierInfo *getIdentifierInfo(StringRef Name) const {
458    return &Identifiers.get(Name);
459  }
460
461  /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
462  /// If 'Namespace' is non-null, then it is a token required to exist on the
463  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
464  void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
465  void AddPragmaHandler(PragmaHandler *Handler) {
466    AddPragmaHandler(StringRef(), Handler);
467  }
468
469  /// RemovePragmaHandler - Remove the specific pragma handler from
470  /// the preprocessor. If \arg Namespace is non-null, then it should
471  /// be the namespace that \arg Handler was added to. It is an error
472  /// to remove a handler that has not been registered.
473  void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
474  void RemovePragmaHandler(PragmaHandler *Handler) {
475    RemovePragmaHandler(StringRef(), Handler);
476  }
477
478  /// \brief Add the specified comment handler to the preprocessor.
479  void AddCommentHandler(CommentHandler *Handler);
480
481  /// \brief Remove the specified comment handler.
482  ///
483  /// It is an error to remove a handler that has not been registered.
484  void RemoveCommentHandler(CommentHandler *Handler);
485
486  /// \brief Set the code completion handler to the given object.
487  void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
488    CodeComplete = &Handler;
489  }
490
491  /// \brief Retrieve the current code-completion handler.
492  CodeCompletionHandler *getCodeCompletionHandler() const {
493    return CodeComplete;
494  }
495
496  /// \brief Clear out the code completion handler.
497  void clearCodeCompletionHandler() {
498    CodeComplete = 0;
499  }
500
501  /// \brief Hook used by the lexer to invoke the "natural language" code
502  /// completion point.
503  void CodeCompleteNaturalLanguage();
504
505  /// \brief Retrieve the preprocessing record, or NULL if there is no
506  /// preprocessing record.
507  PreprocessingRecord *getPreprocessingRecord() const { return Record; }
508
509  /// \brief Create a new preprocessing record, which will keep track of
510  /// all macro expansions, macro definitions, etc.
511  void createPreprocessingRecord(bool IncludeNestedMacroExpansions);
512
513  /// EnterMainSourceFile - Enter the specified FileID as the main source file,
514  /// which implicitly adds the builtin defines etc.
515  void EnterMainSourceFile();
516
517  /// EndSourceFile - Inform the preprocessor callbacks that processing is
518  /// complete.
519  void EndSourceFile();
520
521  /// EnterSourceFile - Add a source file to the top of the include stack and
522  /// start lexing tokens from it instead of the current buffer.  Emit an error
523  /// and don't enter the file on error.
524  void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
525                       SourceLocation Loc);
526
527  /// EnterMacro - Add a Macro to the top of the include stack and start lexing
528  /// tokens from it instead of the current buffer.  Args specifies the
529  /// tokens input to a function-like macro.
530  ///
531  /// ILEnd specifies the location of the ')' for a function-like macro or the
532  /// identifier for an object-like macro.
533  void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args);
534
535  /// EnterTokenStream - Add a "macro" context to the top of the include stack,
536  /// which will cause the lexer to start returning the specified tokens.
537  ///
538  /// If DisableMacroExpansion is true, tokens lexed from the token stream will
539  /// not be subject to further macro expansion.  Otherwise, these tokens will
540  /// be re-macro-expanded when/if expansion is enabled.
541  ///
542  /// If OwnsTokens is false, this method assumes that the specified stream of
543  /// tokens has a permanent owner somewhere, so they do not need to be copied.
544  /// If it is true, it assumes the array of tokens is allocated with new[] and
545  /// must be freed.
546  ///
547  void EnterTokenStream(const Token *Toks, unsigned NumToks,
548                        bool DisableMacroExpansion, bool OwnsTokens);
549
550  /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
551  /// lexer stack.  This should only be used in situations where the current
552  /// state of the top-of-stack lexer is known.
553  void RemoveTopOfLexerStack();
554
555  /// EnableBacktrackAtThisPos - From the point that this method is called, and
556  /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
557  /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
558  /// make the Preprocessor re-lex the same tokens.
559  ///
560  /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
561  /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
562  /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
563  ///
564  /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
565  /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
566  /// tokens will continue indefinitely.
567  ///
568  void EnableBacktrackAtThisPos();
569
570  /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
571  void CommitBacktrackedTokens();
572
573  /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
574  /// EnableBacktrackAtThisPos() was previously called.
575  void Backtrack();
576
577  /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
578  /// caching of tokens is on.
579  bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
580
581  /// Lex - To lex a token from the preprocessor, just pull a token from the
582  /// current lexer or macro object.
583  void Lex(Token &Result) {
584    switch (CurLexerKind) {
585    case CLK_Lexer: CurLexer->Lex(Result); break;
586    case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
587    case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
588    case CLK_CachingLexer: CachingLex(Result); break;
589    case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
590    }
591  }
592
593  void LexAfterModuleImport(Token &Result);
594
595  /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
596  /// something not a comment.  This is useful in -E -C mode where comments
597  /// would foul up preprocessor directive handling.
598  void LexNonComment(Token &Result) {
599    do
600      Lex(Result);
601    while (Result.getKind() == tok::comment);
602  }
603
604  /// LexUnexpandedToken - This is just like Lex, but this disables macro
605  /// expansion of identifier tokens.
606  void LexUnexpandedToken(Token &Result) {
607    // Disable macro expansion.
608    bool OldVal = DisableMacroExpansion;
609    DisableMacroExpansion = true;
610    // Lex the token.
611    Lex(Result);
612
613    // Reenable it.
614    DisableMacroExpansion = OldVal;
615  }
616
617  /// LexUnexpandedNonComment - Like LexNonComment, but this disables macro
618  /// expansion of identifier tokens.
619  void LexUnexpandedNonComment(Token &Result) {
620    do
621      LexUnexpandedToken(Result);
622    while (Result.getKind() == tok::comment);
623  }
624
625  /// LookAhead - This peeks ahead N tokens and returns that token without
626  /// consuming any tokens.  LookAhead(0) returns the next token that would be
627  /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
628  /// returns normal tokens after phase 5.  As such, it is equivalent to using
629  /// 'Lex', not 'LexUnexpandedToken'.
630  const Token &LookAhead(unsigned N) {
631    if (CachedLexPos + N < CachedTokens.size())
632      return CachedTokens[CachedLexPos+N];
633    else
634      return PeekAhead(N+1);
635  }
636
637  /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
638  /// this allows to revert a specific number of tokens.
639  /// Note that the number of tokens being reverted should be up to the last
640  /// backtrack position, not more.
641  void RevertCachedTokens(unsigned N) {
642    assert(isBacktrackEnabled() &&
643           "Should only be called when tokens are cached for backtracking");
644    assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
645         && "Should revert tokens up to the last backtrack position, not more");
646    assert(signed(CachedLexPos) - signed(N) >= 0 &&
647           "Corrupted backtrack positions ?");
648    CachedLexPos -= N;
649  }
650
651  /// EnterToken - Enters a token in the token stream to be lexed next. If
652  /// BackTrack() is called afterwards, the token will remain at the insertion
653  /// point.
654  void EnterToken(const Token &Tok) {
655    EnterCachingLexMode();
656    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
657  }
658
659  /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
660  /// tokens (because backtrack is enabled) it should replace the most recent
661  /// cached tokens with the given annotation token. This function has no effect
662  /// if backtracking is not enabled.
663  ///
664  /// Note that the use of this function is just for optimization; so that the
665  /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
666  /// invoked.
667  void AnnotateCachedTokens(const Token &Tok) {
668    assert(Tok.isAnnotation() && "Expected annotation token");
669    if (CachedLexPos != 0 && isBacktrackEnabled())
670      AnnotatePreviousCachedTokens(Tok);
671  }
672
673  /// \brief Replace the last token with an annotation token.
674  ///
675  /// Like AnnotateCachedTokens(), this routine replaces an
676  /// already-parsed (and resolved) token with an annotation
677  /// token. However, this routine only replaces the last token with
678  /// the annotation token; it does not affect any other cached
679  /// tokens. This function has no effect if backtracking is not
680  /// enabled.
681  void ReplaceLastTokenWithAnnotation(const Token &Tok) {
682    assert(Tok.isAnnotation() && "Expected annotation token");
683    if (CachedLexPos != 0 && isBacktrackEnabled())
684      CachedTokens[CachedLexPos-1] = Tok;
685  }
686
687  /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
688  /// CurTokenLexer pointers.
689  void recomputeCurLexerKind();
690
691  /// \brief Specify the point at which code-completion will be performed.
692  ///
693  /// \param File the file in which code completion should occur. If
694  /// this file is included multiple times, code-completion will
695  /// perform completion the first time it is included. If NULL, this
696  /// function clears out the code-completion point.
697  ///
698  /// \param Line the line at which code completion should occur
699  /// (1-based).
700  ///
701  /// \param Column the column at which code completion should occur
702  /// (1-based).
703  ///
704  /// \returns true if an error occurred, false otherwise.
705  bool SetCodeCompletionPoint(const FileEntry *File,
706                              unsigned Line, unsigned Column);
707
708  /// \brief Determine if we are performing code completion.
709  bool isCodeCompletionEnabled() const { return CodeCompletionFile != 0; }
710
711  /// \brief Returns the location of the code-completion point.
712  /// Returns an invalid location if code-completion is not enabled or the file
713  /// containing the code-completion point has not been lexed yet.
714  SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
715
716  /// \brief Returns the start location of the file of code-completion point.
717  /// Returns an invalid location if code-completion is not enabled or the file
718  /// containing the code-completion point has not been lexed yet.
719  SourceLocation getCodeCompletionFileLoc() const {
720    return CodeCompletionFileLoc;
721  }
722
723  /// \brief Returns true if code-completion is enabled and we have hit the
724  /// code-completion point.
725  bool isCodeCompletionReached() const { return CodeCompletionReached; }
726
727  /// \brief Note that we hit the code-completion point.
728  void setCodeCompletionReached() {
729    assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
730    CodeCompletionReached = true;
731    // Silence any diagnostics that occur after we hit the code-completion.
732    getDiagnostics().setSuppressAllDiagnostics(true);
733  }
734
735  /// \brief The location of the currently-active #pragma clang
736  /// arc_cf_code_audited begin.  Returns an invalid location if there
737  /// is no such pragma active.
738  SourceLocation getPragmaARCCFCodeAuditedLoc() const {
739    return PragmaARCCFCodeAuditedLoc;
740  }
741
742  /// \brief Set the location of the currently-active #pragma clang
743  /// arc_cf_code_audited begin.  An invalid location ends the pragma.
744  void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
745    PragmaARCCFCodeAuditedLoc = Loc;
746  }
747
748  /// \brief Instruct the preprocessor to skip part of the main
749  /// the main source file.
750  ///
751  /// \brief Bytes The number of bytes in the preamble to skip.
752  ///
753  /// \brief StartOfLine Whether skipping these bytes puts the lexer at the
754  /// start of a line.
755  void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
756    SkipMainFilePreamble.first = Bytes;
757    SkipMainFilePreamble.second = StartOfLine;
758  }
759
760  /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
761  /// the specified Token's location, translating the token's start
762  /// position in the current buffer into a SourcePosition object for rendering.
763  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
764    return Diags->Report(Loc, DiagID);
765  }
766
767  DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
768    return Diags->Report(Tok.getLocation(), DiagID);
769  }
770
771  /// getSpelling() - Return the 'spelling' of the token at the given
772  /// location; does not go up to the spelling location or down to the
773  /// expansion location.
774  ///
775  /// \param buffer A buffer which will be used only if the token requires
776  ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
777  /// \param invalid If non-null, will be set \c true if an error occurs.
778  StringRef getSpelling(SourceLocation loc,
779                              SmallVectorImpl<char> &buffer,
780                              bool *invalid = 0) const {
781    return Lexer::getSpelling(loc, buffer, SourceMgr, Features, invalid);
782  }
783
784  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
785  /// token is the characters used to represent the token in the source file
786  /// after trigraph expansion and escaped-newline folding.  In particular, this
787  /// wants to get the true, uncanonicalized, spelling of things like digraphs
788  /// UCNs, etc.
789  ///
790  /// \param Invalid If non-null, will be set \c true if an error occurs.
791  std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
792    return Lexer::getSpelling(Tok, SourceMgr, Features, Invalid);
793  }
794
795  /// getSpelling - This method is used to get the spelling of a token into a
796  /// preallocated buffer, instead of as an std::string.  The caller is required
797  /// to allocate enough space for the token, which is guaranteed to be at least
798  /// Tok.getLength() bytes long.  The length of the actual result is returned.
799  ///
800  /// Note that this method may do two possible things: it may either fill in
801  /// the buffer specified with characters, or it may *change the input pointer*
802  /// to point to a constant buffer with the data already in it (avoiding a
803  /// copy).  The caller is not allowed to modify the returned buffer pointer
804  /// if an internal buffer is returned.
805  unsigned getSpelling(const Token &Tok, const char *&Buffer,
806                       bool *Invalid = 0) const {
807    return Lexer::getSpelling(Tok, Buffer, SourceMgr, Features, Invalid);
808  }
809
810  /// getSpelling - This method is used to get the spelling of a token into a
811  /// SmallVector. Note that the returned StringRef may not point to the
812  /// supplied buffer if a copy can be avoided.
813  StringRef getSpelling(const Token &Tok,
814                              SmallVectorImpl<char> &Buffer,
815                              bool *Invalid = 0) const;
816
817  /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
818  /// with length 1, return the character.
819  char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
820                                                   bool *Invalid = 0) const {
821    assert(Tok.is(tok::numeric_constant) &&
822           Tok.getLength() == 1 && "Called on unsupported token");
823    assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
824
825    // If the token is carrying a literal data pointer, just use it.
826    if (const char *D = Tok.getLiteralData())
827      return *D;
828
829    // Otherwise, fall back on getCharacterData, which is slower, but always
830    // works.
831    return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
832  }
833
834  /// \brief Retrieve the name of the immediate macro expansion.
835  ///
836  /// This routine starts from a source location, and finds the name of the macro
837  /// responsible for its immediate expansion. It looks through any intervening
838  /// macro argument expansions to compute this. It returns a StringRef which
839  /// refers to the SourceManager-owned buffer of the source where that macro
840  /// name is spelled. Thus, the result shouldn't out-live the SourceManager.
841  StringRef getImmediateMacroName(SourceLocation Loc) {
842    return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOptions());
843  }
844
845  /// CreateString - Plop the specified string into a scratch buffer and set the
846  /// specified token's location and length to it.  If specified, the source
847  /// location provides a location of the expansion point of the token.
848  void CreateString(const char *Buf, unsigned Len, Token &Tok,
849                    SourceLocation ExpansionLocStart = SourceLocation(),
850                    SourceLocation ExpansionLocEnd = SourceLocation());
851
852  /// \brief Computes the source location just past the end of the
853  /// token at this source location.
854  ///
855  /// This routine can be used to produce a source location that
856  /// points just past the end of the token referenced by \p Loc, and
857  /// is generally used when a diagnostic needs to point just after a
858  /// token where it expected something different that it received. If
859  /// the returned source location would not be meaningful (e.g., if
860  /// it points into a macro), this routine returns an invalid
861  /// source location.
862  ///
863  /// \param Offset an offset from the end of the token, where the source
864  /// location should refer to. The default offset (0) produces a source
865  /// location pointing just past the end of the token; an offset of 1 produces
866  /// a source location pointing to the last character in the token, etc.
867  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
868    return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, Features);
869  }
870
871  /// \brief Returns true if the given MacroID location points at the first
872  /// token of the macro expansion.
873  ///
874  /// \param MacroBegin If non-null and function returns true, it is set to
875  /// begin location of the macro.
876  bool isAtStartOfMacroExpansion(SourceLocation loc,
877                                 SourceLocation *MacroBegin = 0) const {
878    return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, Features,
879                                            MacroBegin);
880  }
881
882  /// \brief Returns true if the given MacroID location points at the last
883  /// token of the macro expansion.
884  ///
885  /// \param MacroBegin If non-null and function returns true, it is set to
886  /// end location of the macro.
887  bool isAtEndOfMacroExpansion(SourceLocation loc,
888                               SourceLocation *MacroEnd = 0) const {
889    return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, Features, MacroEnd);
890  }
891
892  /// DumpToken - Print the token to stderr, used for debugging.
893  ///
894  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
895  void DumpLocation(SourceLocation Loc) const;
896  void DumpMacro(const MacroInfo &MI) const;
897
898  /// AdvanceToTokenCharacter - Given a location that specifies the start of a
899  /// token, return a new location that specifies a character within the token.
900  SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
901                                         unsigned Char) const {
902    return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, Features);
903  }
904
905  /// IncrementPasteCounter - Increment the counters for the number of token
906  /// paste operations performed.  If fast was specified, this is a 'fast paste'
907  /// case we handled.
908  ///
909  void IncrementPasteCounter(bool isFast) {
910    if (isFast)
911      ++NumFastTokenPaste;
912    else
913      ++NumTokenPaste;
914  }
915
916  void PrintStats();
917
918  size_t getTotalMemory() const;
919
920  /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
921  /// comment (/##/) in microsoft mode, this method handles updating the current
922  /// state, returning the token on the next source line.
923  void HandleMicrosoftCommentPaste(Token &Tok);
924
925  //===--------------------------------------------------------------------===//
926  // Preprocessor callback methods.  These are invoked by a lexer as various
927  // directives and events are found.
928
929  /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
930  /// identifier information for the token and install it into the token,
931  /// updating the token kind accordingly.
932  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
933
934private:
935  llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
936
937public:
938
939  // SetPoisonReason - Call this function to indicate the reason for
940  // poisoning an identifier. If that identifier is accessed while
941  // poisoned, then this reason will be used instead of the default
942  // "poisoned" diagnostic.
943  void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
944
945  // HandlePoisonedIdentifier - Display reason for poisoned
946  // identifier.
947  void HandlePoisonedIdentifier(Token & Tok);
948
949  void MaybeHandlePoisonedIdentifier(Token & Identifier) {
950    if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
951      if(II->isPoisoned()) {
952        HandlePoisonedIdentifier(Identifier);
953      }
954    }
955  }
956
957private:
958  /// Identifiers used for SEH handling in Borland. These are only
959  /// allowed in particular circumstances
960  // __except block
961  IdentifierInfo *Ident__exception_code,
962                 *Ident___exception_code,
963                 *Ident_GetExceptionCode;
964  // __except filter expression
965  IdentifierInfo *Ident__exception_info,
966                 *Ident___exception_info,
967                 *Ident_GetExceptionInfo;
968  // __finally
969  IdentifierInfo *Ident__abnormal_termination,
970                 *Ident___abnormal_termination,
971                 *Ident_AbnormalTermination;
972public:
973  void PoisonSEHIdentifiers(bool Poison = true); // Borland
974
975  /// HandleIdentifier - This callback is invoked when the lexer reads an
976  /// identifier and has filled in the tokens IdentifierInfo member.  This
977  /// callback potentially macro expands it or turns it into a named token (like
978  /// 'for').
979  void HandleIdentifier(Token &Identifier);
980
981
982  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
983  /// the current file.  This either returns the EOF token and returns true, or
984  /// pops a level off the include stack and returns false, at which point the
985  /// client should call lex again.
986  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
987
988  /// HandleEndOfTokenLexer - This callback is invoked when the current
989  /// TokenLexer hits the end of its token stream.
990  bool HandleEndOfTokenLexer(Token &Result);
991
992  /// HandleDirective - This callback is invoked when the lexer sees a # token
993  /// at the start of a line.  This consumes the directive, modifies the
994  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
995  /// read is the correct one.
996  void HandleDirective(Token &Result);
997
998  /// CheckEndOfDirective - Ensure that the next token is a tok::eod token.  If
999  /// not, emit a diagnostic and consume up until the eod.  If EnableMacros is
1000  /// true, then we consider macros that expand to zero tokens as being ok.
1001  void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1002
1003  /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
1004  /// current line until the tok::eod token is found.
1005  void DiscardUntilEndOfDirective();
1006
1007  /// SawDateOrTime - This returns true if the preprocessor has seen a use of
1008  /// __DATE__ or __TIME__ in the file so far.
1009  bool SawDateOrTime() const {
1010    return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1011  }
1012  unsigned getCounterValue() const { return CounterValue; }
1013  void setCounterValue(unsigned V) { CounterValue = V; }
1014
1015  /// \brief Retrieves the module that we're currently building, if any.
1016  Module *getCurrentModule();
1017
1018  /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
1019  ///  SourceLocation.
1020  MacroInfo *AllocateMacroInfo(SourceLocation L);
1021
1022  /// CloneMacroInfo - Allocate a new MacroInfo object which is clone of MI.
1023  MacroInfo *CloneMacroInfo(const MacroInfo &MI);
1024
1025  /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1026  /// checked and spelled filename, e.g. as an operand of #include. This returns
1027  /// true if the input filename was in <>'s or false if it were in ""'s.  The
1028  /// caller is expected to provide a buffer that is large enough to hold the
1029  /// spelling of the filename, but is also expected to handle the case when
1030  /// this method decides to use a different buffer.
1031  bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1032
1033  /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
1034  /// return null on failure.  isAngled indicates whether the file reference is
1035  /// for system #include's or not (i.e. using <> instead of "").
1036  const FileEntry *LookupFile(StringRef Filename,
1037                              bool isAngled, const DirectoryLookup *FromDir,
1038                              const DirectoryLookup *&CurDir,
1039                              SmallVectorImpl<char> *SearchPath,
1040                              SmallVectorImpl<char> *RelativePath,
1041                              Module **SuggestedModule,
1042                              bool SkipCache = false);
1043
1044  /// GetCurLookup - The DirectoryLookup structure used to find the current
1045  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
1046  /// implement #include_next and find directory-specific properties.
1047  const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1048
1049  /// isInPrimaryFile - Return true if we're in the top-level file, not in a
1050  /// #include.
1051  bool isInPrimaryFile() const;
1052
1053  /// ConcatenateIncludeName - Handle cases where the #include name is expanded
1054  /// from a macro as multiple tokens, which need to be glued together.  This
1055  /// occurs for code like:
1056  ///    #define FOO <a/b.h>
1057  ///    #include FOO
1058  /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
1059  ///
1060  /// This code concatenates and consumes tokens up to the '>' token.  It
1061  /// returns false if the > was found, otherwise it returns true if it finds
1062  /// and consumes the EOD marker.
1063  bool ConcatenateIncludeName(llvm::SmallString<128> &FilenameBuffer,
1064                              SourceLocation &End);
1065
1066  /// LexOnOffSwitch - Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1067  /// followed by EOD.  Return true if the token is not a valid on-off-switch.
1068  bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1069
1070private:
1071
1072  void PushIncludeMacroStack() {
1073    IncludeMacroStack.push_back(IncludeStackInfo(CurLexerKind,
1074                                                 CurLexer.take(),
1075                                                 CurPTHLexer.take(),
1076                                                 CurPPLexer,
1077                                                 CurTokenLexer.take(),
1078                                                 CurDirLookup));
1079    CurPPLexer = 0;
1080  }
1081
1082  void PopIncludeMacroStack() {
1083    CurLexer.reset(IncludeMacroStack.back().TheLexer);
1084    CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
1085    CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1086    CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
1087    CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
1088    CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1089    IncludeMacroStack.pop_back();
1090  }
1091
1092  /// AllocateMacroInfo - Allocate a new MacroInfo object.
1093  MacroInfo *AllocateMacroInfo();
1094
1095  /// ReleaseMacroInfo - Release the specified MacroInfo.  This memory will
1096  ///  be reused for allocating new MacroInfo objects.
1097  void ReleaseMacroInfo(MacroInfo* MI);
1098
1099  /// ReadMacroName - Lex and validate a macro name, which occurs after a
1100  /// #define or #undef.  This emits a diagnostic, sets the token kind to eod,
1101  /// and discards the rest of the macro line if the macro name is invalid.
1102  void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
1103
1104  /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
1105  /// definition has just been read.  Lex the rest of the arguments and the
1106  /// closing ), updating MI with what we learn.  Return true if an error occurs
1107  /// parsing the arg list.
1108  bool ReadMacroDefinitionArgList(MacroInfo *MI);
1109
1110  /// SkipExcludedConditionalBlock - We just read a #if or related directive and
1111  /// decided that the subsequent tokens are in the #if'd out portion of the
1112  /// file.  Lex the rest of the file, until we see an #endif.  If
1113  /// FoundNonSkipPortion is true, then we have already emitted code for part of
1114  /// this #if directive, so #else/#elif blocks should never be entered. If
1115  /// FoundElse is false, then #else directives are ok, if not, then we have
1116  /// already seen one so a #else directive is a duplicate.  When this returns,
1117  /// the caller can lex the first valid token.
1118  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1119                                    bool FoundNonSkipPortion, bool FoundElse,
1120                                    SourceLocation ElseLoc = SourceLocation());
1121
1122  /// PTHSkipExcludedConditionalBlock - A fast PTH version of
1123  ///  SkipExcludedConditionalBlock.
1124  void PTHSkipExcludedConditionalBlock();
1125
1126  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
1127  /// may occur after a #if or #elif directive and return it as a bool.  If the
1128  /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1129  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1130
1131  /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
1132  /// #pragma GCC poison/system_header/dependency and #pragma once.
1133  void RegisterBuiltinPragmas();
1134
1135  /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
1136  /// identifier table.
1137  void RegisterBuiltinMacros();
1138
1139  /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
1140  /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
1141  /// the macro should not be expanded return true, otherwise return false.
1142  bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
1143
1144  /// \brief Cache macro expanded tokens for TokenLexers.
1145  //
1146  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1147  /// going to lex in the cache and when it finishes the tokens are removed
1148  /// from the end of the cache.
1149  Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1150                                  ArrayRef<Token> tokens);
1151  void removeCachedMacroExpandedTokensOfLastLexer();
1152  friend void TokenLexer::ExpandFunctionArguments();
1153
1154  /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
1155  /// lexed is a '('.  If so, consume the token and return true, if not, this
1156  /// method should have no observable side-effect on the lexed tokens.
1157  bool isNextPPTokenLParen();
1158
1159  /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
1160  /// invoked to read all of the formal arguments specified for the macro
1161  /// invocation.  This returns null on error.
1162  MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
1163                                       SourceLocation &ExpansionEnd);
1164
1165  /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
1166  /// as a builtin macro, handle it and return the next token as 'Tok'.
1167  void ExpandBuiltinMacro(Token &Tok);
1168
1169  /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
1170  /// return the first token after the directive.  The _Pragma token has just
1171  /// been read into 'Tok'.
1172  void Handle_Pragma(Token &Tok);
1173
1174  /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
1175  /// is not enclosed within a string literal.
1176  void HandleMicrosoft__pragma(Token &Tok);
1177
1178  /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
1179  /// start lexing tokens from it instead of the current buffer.
1180  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1181
1182  /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
1183  /// start getting tokens from it using the PTH cache.
1184  void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1185
1186  /// IsFileLexer - Returns true if we are lexing from a file and not a
1187  ///  pragma or a macro.
1188  static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1189    return L ? !L->isPragmaLexer() : P != 0;
1190  }
1191
1192  static bool IsFileLexer(const IncludeStackInfo& I) {
1193    return IsFileLexer(I.TheLexer, I.ThePPLexer);
1194  }
1195
1196  bool IsFileLexer() const {
1197    return IsFileLexer(CurLexer.get(), CurPPLexer);
1198  }
1199
1200  //===--------------------------------------------------------------------===//
1201  // Caching stuff.
1202  void CachingLex(Token &Result);
1203  bool InCachingLexMode() const {
1204    // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1205    // that we are past EOF, not that we are in CachingLex mode.
1206    return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
1207           !IncludeMacroStack.empty();
1208  }
1209  void EnterCachingLexMode();
1210  void ExitCachingLexMode() {
1211    if (InCachingLexMode())
1212      RemoveTopOfLexerStack();
1213  }
1214  const Token &PeekAhead(unsigned N);
1215  void AnnotatePreviousCachedTokens(const Token &Tok);
1216
1217  //===--------------------------------------------------------------------===//
1218  /// Handle*Directive - implement the various preprocessor directives.  These
1219  /// should side-effect the current preprocessor object so that the next call
1220  /// to Lex() will return the appropriate token next.
1221  void HandleLineDirective(Token &Tok);
1222  void HandleDigitDirective(Token &Tok);
1223  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1224  void HandleIdentSCCSDirective(Token &Tok);
1225  void HandleMacroPublicDirective(Token &Tok);
1226  void HandleMacroPrivateDirective(Token &Tok);
1227
1228  // File inclusion.
1229  void HandleIncludeDirective(SourceLocation HashLoc,
1230                              Token &Tok,
1231                              const DirectoryLookup *LookupFrom = 0,
1232                              bool isImport = false);
1233  void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1234  void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1235  void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1236
1237  // Macro handling.
1238  void HandleDefineDirective(Token &Tok);
1239  void HandleUndefDirective(Token &Tok);
1240
1241  // Conditional Inclusion.
1242  void HandleIfdefDirective(Token &Tok, bool isIfndef,
1243                            bool ReadAnyTokensBeforeDirective);
1244  void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1245  void HandleEndifDirective(Token &Tok);
1246  void HandleElseDirective(Token &Tok);
1247  void HandleElifDirective(Token &Tok);
1248
1249  // Pragmas.
1250  void HandlePragmaDirective(unsigned Introducer);
1251public:
1252  void HandlePragmaOnce(Token &OnceTok);
1253  void HandlePragmaMark();
1254  void HandlePragmaPoison(Token &PoisonTok);
1255  void HandlePragmaSystemHeader(Token &SysHeaderTok);
1256  void HandlePragmaDependency(Token &DependencyTok);
1257  void HandlePragmaComment(Token &CommentTok);
1258  void HandlePragmaMessage(Token &MessageTok);
1259  void HandlePragmaPushMacro(Token &Tok);
1260  void HandlePragmaPopMacro(Token &Tok);
1261  IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1262
1263  // Return true and store the first token only if any CommentHandler
1264  // has inserted some tokens and getCommentRetentionState() is false.
1265  bool HandleComment(Token &Token, SourceRange Comment);
1266
1267  /// \brief A macro is used, update information about macros that need unused
1268  /// warnings.
1269  void markMacroAsUsed(MacroInfo *MI);
1270};
1271
1272/// \brief Abstract base class that describes a handler that will receive
1273/// source ranges for each of the comments encountered in the source file.
1274class CommentHandler {
1275public:
1276  virtual ~CommentHandler();
1277
1278  // The handler shall return true if it has pushed any tokens
1279  // to be read using e.g. EnterToken or EnterTokenStream.
1280  virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1281};
1282
1283}  // end namespace clang
1284
1285#endif
1286