Preprocessor.h revision 6bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89
1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Defines the clang::Preprocessor interface.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16#define LLVM_CLANG_LEX_PREPROCESSOR_H
17
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/Diagnostic.h"
20#include "clang/Basic/IdentifierTable.h"
21#include "clang/Basic/SourceLocation.h"
22#include "clang/Lex/Lexer.h"
23#include "clang/Lex/MacroInfo.h"
24#include "clang/Lex/ModuleMap.h"
25#include "clang/Lex/PPCallbacks.h"
26#include "clang/Lex/PTHLexer.h"
27#include "clang/Lex/PTHManager.h"
28#include "clang/Lex/TokenLexer.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/IntrusiveRefCntPtr.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/Support/Allocator.h"
35#include <memory>
36#include <vector>
37
38namespace llvm {
39  template<unsigned InternalLen> class SmallString;
40}
41
42namespace clang {
43
44class SourceManager;
45class ExternalPreprocessorSource;
46class FileManager;
47class FileEntry;
48class HeaderSearch;
49class PragmaNamespace;
50class PragmaHandler;
51class CommentHandler;
52class ScratchBuffer;
53class TargetInfo;
54class PPCallbacks;
55class CodeCompletionHandler;
56class DirectoryLookup;
57class PreprocessingRecord;
58class ModuleLoader;
59class PreprocessorOptions;
60
61/// \brief Stores token information for comparing actual tokens with
62/// predefined values.  Only handles simple tokens and identifiers.
63class TokenValue {
64  tok::TokenKind Kind;
65  IdentifierInfo *II;
66
67public:
68  TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
69    assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
70    assert(Kind != tok::identifier &&
71           "Identifiers should be created by TokenValue(IdentifierInfo *)");
72    assert(!tok::isLiteral(Kind) && "Literals are not supported.");
73    assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
74  }
75  TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
76  bool operator==(const Token &Tok) const {
77    return Tok.getKind() == Kind &&
78        (!II || II == Tok.getIdentifierInfo());
79  }
80};
81
82/// \brief Engages in a tight little dance with the lexer to efficiently
83/// preprocess tokens.
84///
85/// Lexers know only about tokens within a single source file, and don't
86/// know anything about preprocessor-level issues like the \#include stack,
87/// token expansion, etc.
88class Preprocessor : public RefCountedBase<Preprocessor> {
89  IntrusiveRefCntPtr<PreprocessorOptions> PPOpts;
90  DiagnosticsEngine        *Diags;
91  LangOptions       &LangOpts;
92  const TargetInfo  *Target;
93  FileManager       &FileMgr;
94  SourceManager     &SourceMgr;
95  ScratchBuffer     *ScratchBuf;
96  HeaderSearch      &HeaderInfo;
97  ModuleLoader      &TheModuleLoader;
98
99  /// \brief External source of macros.
100  ExternalPreprocessorSource *ExternalSource;
101
102
103  /// An optional PTHManager object used for getting tokens from
104  /// a token cache rather than lexing the original source file.
105  std::unique_ptr<PTHManager> PTH;
106
107  /// A BumpPtrAllocator object used to quickly allocate and release
108  /// objects internal to the Preprocessor.
109  llvm::BumpPtrAllocator BP;
110
111  /// Identifiers for builtin macros and other builtins.
112  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
113  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
114  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
115  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
116  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
117  IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
118  IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
119  IdentifierInfo *Ident__identifier;               // __identifier
120  IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
121  IdentifierInfo *Ident__has_feature;              // __has_feature
122  IdentifierInfo *Ident__has_extension;            // __has_extension
123  IdentifierInfo *Ident__has_builtin;              // __has_builtin
124  IdentifierInfo *Ident__has_attribute;            // __has_attribute
125  IdentifierInfo *Ident__has_include;              // __has_include
126  IdentifierInfo *Ident__has_include_next;         // __has_include_next
127  IdentifierInfo *Ident__has_warning;              // __has_warning
128  IdentifierInfo *Ident__is_identifier;            // __is_identifier
129  IdentifierInfo *Ident__building_module;          // __building_module
130  IdentifierInfo *Ident__MODULE__;                 // __MODULE__
131
132  SourceLocation DATELoc, TIMELoc;
133  unsigned CounterValue;  // Next __COUNTER__ value.
134
135  enum {
136    /// \brief Maximum depth of \#includes.
137    MaxAllowedIncludeStackDepth = 200
138  };
139
140  // State that is set before the preprocessor begins.
141  bool KeepComments : 1;
142  bool KeepMacroComments : 1;
143  bool SuppressIncludeNotFoundError : 1;
144
145  // State that changes while the preprocessor runs:
146  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
147
148  /// Whether the preprocessor owns the header search object.
149  bool OwnsHeaderSearch : 1;
150
151  /// True if macro expansion is disabled.
152  bool DisableMacroExpansion : 1;
153
154  /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
155  /// when parsing preprocessor directives.
156  bool MacroExpansionInDirectivesOverride : 1;
157
158  class ResetMacroExpansionHelper;
159
160  /// \brief Whether we have already loaded macros from the external source.
161  mutable bool ReadMacrosFromExternalSource : 1;
162
163  /// \brief True if pragmas are enabled.
164  bool PragmasEnabled : 1;
165
166  /// \brief True if the current build action is a preprocessing action.
167  bool PreprocessedOutput : 1;
168
169  /// \brief True if we are currently preprocessing a #if or #elif directive
170  bool ParsingIfOrElifDirective;
171
172  /// \brief True if we are pre-expanding macro arguments.
173  bool InMacroArgPreExpansion;
174
175  /// \brief Mapping/lookup information for all identifiers in
176  /// the program, including program keywords.
177  mutable IdentifierTable Identifiers;
178
179  /// \brief This table contains all the selectors in the program.
180  ///
181  /// Unlike IdentifierTable above, this table *isn't* populated by the
182  /// preprocessor. It is declared/expanded here because its role/lifetime is
183  /// conceptually similar to the IdentifierTable. In addition, the current
184  /// control flow (in clang::ParseAST()), make it convenient to put here.
185  ///
186  /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
187  /// the lifetime of the preprocessor.
188  SelectorTable Selectors;
189
190  /// \brief Information about builtins.
191  Builtin::Context BuiltinInfo;
192
193  /// \brief Tracks all of the pragmas that the client registered
194  /// with this preprocessor.
195  PragmaNamespace *PragmaHandlers;
196
197  /// \brief Tracks all of the comment handlers that the client registered
198  /// with this preprocessor.
199  std::vector<CommentHandler *> CommentHandlers;
200
201  /// \brief True if we want to ignore EOF token and continue later on (thus
202  /// avoid tearing the Lexer and etc. down).
203  bool IncrementalProcessing;
204
205  /// The kind of translation unit we are processing.
206  TranslationUnitKind TUKind;
207
208  /// \brief The code-completion handler.
209  CodeCompletionHandler *CodeComplete;
210
211  /// \brief The file that we're performing code-completion for, if any.
212  const FileEntry *CodeCompletionFile;
213
214  /// \brief The offset in file for the code-completion point.
215  unsigned CodeCompletionOffset;
216
217  /// \brief The location for the code-completion point. This gets instantiated
218  /// when the CodeCompletionFile gets \#include'ed for preprocessing.
219  SourceLocation CodeCompletionLoc;
220
221  /// \brief The start location for the file of the code-completion point.
222  ///
223  /// This gets instantiated when the CodeCompletionFile gets \#include'ed
224  /// for preprocessing.
225  SourceLocation CodeCompletionFileLoc;
226
227  /// \brief The source location of the \c import contextual keyword we just
228  /// lexed, if any.
229  SourceLocation ModuleImportLoc;
230
231  /// \brief The module import path that we're currently processing.
232  SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
233
234  /// \brief Whether the last token we lexed was an '@'.
235  bool LastTokenWasAt;
236
237  /// \brief Whether the module import expects an identifier next. Otherwise,
238  /// it expects a '.' or ';'.
239  bool ModuleImportExpectsIdentifier;
240
241  /// \brief The source location of the currently-active
242  /// \#pragma clang arc_cf_code_audited begin.
243  SourceLocation PragmaARCCFCodeAuditedLoc;
244
245  /// \brief True if we hit the code-completion point.
246  bool CodeCompletionReached;
247
248  /// \brief The number of bytes that we will initially skip when entering the
249  /// main file, along with a flag that indicates whether skipping this number
250  /// of bytes will place the lexer at the start of a line.
251  ///
252  /// This is used when loading a precompiled preamble.
253  std::pair<unsigned, bool> SkipMainFilePreamble;
254
255  /// \brief The current top of the stack that we're lexing from if
256  /// not expanding a macro and we are lexing directly from source code.
257  ///
258  /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
259  std::unique_ptr<Lexer> CurLexer;
260
261  /// \brief The current top of stack that we're lexing from if
262  /// not expanding from a macro and we are lexing from a PTH cache.
263  ///
264  /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
265  std::unique_ptr<PTHLexer> CurPTHLexer;
266
267  /// \brief The current top of the stack what we're lexing from
268  /// if not expanding a macro.
269  ///
270  /// This is an alias for either CurLexer or  CurPTHLexer.
271  PreprocessorLexer *CurPPLexer;
272
273  /// \brief Used to find the current FileEntry, if CurLexer is non-null
274  /// and if applicable.
275  ///
276  /// This allows us to implement \#include_next and find directory-specific
277  /// properties.
278  const DirectoryLookup *CurDirLookup;
279
280  /// \brief The current macro we are expanding, if we are expanding a macro.
281  ///
282  /// One of CurLexer and CurTokenLexer must be null.
283  std::unique_ptr<TokenLexer> CurTokenLexer;
284
285  /// \brief The kind of lexer we're currently working with.
286  enum CurLexerKind {
287    CLK_Lexer,
288    CLK_PTHLexer,
289    CLK_TokenLexer,
290    CLK_CachingLexer,
291    CLK_LexAfterModuleImport
292  } CurLexerKind;
293
294  /// \brief If the current lexer is for a submodule that is being built, this
295  /// is that submodule.
296  Module *CurSubmodule;
297
298  /// \brief Keeps track of the stack of files currently
299  /// \#included, and macros currently being expanded from, not counting
300  /// CurLexer/CurTokenLexer.
301  struct IncludeStackInfo {
302    enum CurLexerKind           CurLexerKind;
303    Module                     *TheSubmodule;
304    std::unique_ptr<Lexer>      TheLexer;
305    std::unique_ptr<PTHLexer>   ThePTHLexer;
306    PreprocessorLexer          *ThePPLexer;
307    std::unique_ptr<TokenLexer> TheTokenLexer;
308    const DirectoryLookup      *TheDirLookup;
309
310    // The following constructors are completely useless copies of the default
311    // versions, only needed to pacify MSVC.
312    IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
313                     std::unique_ptr<Lexer> &&TheLexer,
314                     std::unique_ptr<PTHLexer> &&ThePTHLexer,
315                     PreprocessorLexer *ThePPLexer,
316                     std::unique_ptr<TokenLexer> &&TheTokenLexer,
317                     const DirectoryLookup *TheDirLookup)
318        : CurLexerKind(std::move(CurLexerKind)),
319          TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
320          ThePTHLexer(std::move(ThePTHLexer)),
321          ThePPLexer(std::move(ThePPLexer)),
322          TheTokenLexer(std::move(TheTokenLexer)),
323          TheDirLookup(std::move(TheDirLookup)) {}
324    IncludeStackInfo(IncludeStackInfo &&RHS)
325        : CurLexerKind(std::move(RHS.CurLexerKind)),
326          TheSubmodule(std::move(RHS.TheSubmodule)),
327          TheLexer(std::move(RHS.TheLexer)),
328          ThePTHLexer(std::move(RHS.ThePTHLexer)),
329          ThePPLexer(std::move(RHS.ThePPLexer)),
330          TheTokenLexer(std::move(RHS.TheTokenLexer)),
331          TheDirLookup(std::move(RHS.TheDirLookup)) {}
332  };
333  std::vector<IncludeStackInfo> IncludeMacroStack;
334
335  /// \brief Actions invoked when some preprocessor activity is
336  /// encountered (e.g. a file is \#included, etc).
337  PPCallbacks *Callbacks;
338
339  struct MacroExpandsInfo {
340    Token Tok;
341    MacroDirective *MD;
342    SourceRange Range;
343    MacroExpandsInfo(Token Tok, MacroDirective *MD, SourceRange Range)
344      : Tok(Tok), MD(MD), Range(Range) { }
345  };
346  SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
347
348  /// For each IdentifierInfo that was associated with a macro, we
349  /// keep a mapping to the history of all macro definitions and #undefs in
350  /// the reverse order (the latest one is in the head of the list).
351  llvm::DenseMap<const IdentifierInfo*, MacroDirective*> Macros;
352  friend class ASTReader;
353
354  /// \brief Macros that we want to warn because they are not used at the end
355  /// of the translation unit.
356  ///
357  /// We store just their SourceLocations instead of
358  /// something like MacroInfo*. The benefit of this is that when we are
359  /// deserializing from PCH, we don't need to deserialize identifier & macros
360  /// just so that we can report that they are unused, we just warn using
361  /// the SourceLocations of this set (that will be filled by the ASTReader).
362  /// We are using SmallPtrSet instead of a vector for faster removal.
363  typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
364  WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
365
366  /// \brief A "freelist" of MacroArg objects that can be
367  /// reused for quick allocation.
368  MacroArgs *MacroArgCache;
369  friend class MacroArgs;
370
371  /// For each IdentifierInfo used in a \#pragma push_macro directive,
372  /// we keep a MacroInfo stack used to restore the previous macro value.
373  llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
374
375  // Various statistics we track for performance analysis.
376  unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
377  unsigned NumIf, NumElse, NumEndif;
378  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
379  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
380  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
381  unsigned NumSkipped;
382
383  /// \brief The predefined macros that preprocessor should use from the
384  /// command line etc.
385  std::string Predefines;
386
387  /// \brief The file ID for the preprocessor predefines.
388  FileID PredefinesFileID;
389
390  /// \{
391  /// \brief Cache of macro expanders to reduce malloc traffic.
392  enum { TokenLexerCacheSize = 8 };
393  unsigned NumCachedTokenLexers;
394  TokenLexer *TokenLexerCache[TokenLexerCacheSize];
395  /// \}
396
397  /// \brief Keeps macro expanded tokens for TokenLexers.
398  //
399  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
400  /// going to lex in the cache and when it finishes the tokens are removed
401  /// from the end of the cache.
402  SmallVector<Token, 16> MacroExpandedTokens;
403  std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
404
405  /// \brief A record of the macro definitions and expansions that
406  /// occurred during preprocessing.
407  ///
408  /// This is an optional side structure that can be enabled with
409  /// \c createPreprocessingRecord() prior to preprocessing.
410  PreprocessingRecord *Record;
411
412private:  // Cached tokens state.
413  typedef SmallVector<Token, 1> CachedTokensTy;
414
415  /// \brief Cached tokens are stored here when we do backtracking or
416  /// lookahead. They are "lexed" by the CachingLex() method.
417  CachedTokensTy CachedTokens;
418
419  /// \brief The position of the cached token that CachingLex() should
420  /// "lex" next.
421  ///
422  /// If it points beyond the CachedTokens vector, it means that a normal
423  /// Lex() should be invoked.
424  CachedTokensTy::size_type CachedLexPos;
425
426  /// \brief Stack of backtrack positions, allowing nested backtracks.
427  ///
428  /// The EnableBacktrackAtThisPos() method pushes a position to
429  /// indicate where CachedLexPos should be set when the BackTrack() method is
430  /// invoked (at which point the last position is popped).
431  std::vector<CachedTokensTy::size_type> BacktrackPositions;
432
433  struct MacroInfoChain {
434    MacroInfo MI;
435    MacroInfoChain *Next;
436    MacroInfoChain *Prev;
437  };
438
439  /// MacroInfos are managed as a chain for easy disposal.  This is the head
440  /// of that list.
441  MacroInfoChain *MIChainHead;
442
443  /// A "freelist" of MacroInfo objects that can be reused for quick
444  /// allocation.
445  MacroInfoChain *MICache;
446
447  struct DeserializedMacroInfoChain {
448    MacroInfo MI;
449    unsigned OwningModuleID; // MUST be immediately after the MacroInfo object
450                     // so it can be accessed by MacroInfo::getOwningModuleID().
451    DeserializedMacroInfoChain *Next;
452  };
453  DeserializedMacroInfoChain *DeserialMIChainHead;
454
455public:
456  Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
457               DiagnosticsEngine &diags, LangOptions &opts,
458               SourceManager &SM, HeaderSearch &Headers,
459               ModuleLoader &TheModuleLoader,
460               IdentifierInfoLookup *IILookup = nullptr,
461               bool OwnsHeaderSearch = false,
462               TranslationUnitKind TUKind = TU_Complete);
463
464  ~Preprocessor();
465
466  /// \brief Initialize the preprocessor using information about the target.
467  ///
468  /// \param Target is owned by the caller and must remain valid for the
469  /// lifetime of the preprocessor.
470  void Initialize(const TargetInfo &Target);
471
472  /// \brief Retrieve the preprocessor options used to initialize this
473  /// preprocessor.
474  PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
475
476  DiagnosticsEngine &getDiagnostics() const { return *Diags; }
477  void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
478
479  const LangOptions &getLangOpts() const { return LangOpts; }
480  const TargetInfo &getTargetInfo() const { return *Target; }
481  FileManager &getFileManager() const { return FileMgr; }
482  SourceManager &getSourceManager() const { return SourceMgr; }
483  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
484
485  IdentifierTable &getIdentifierTable() { return Identifiers; }
486  SelectorTable &getSelectorTable() { return Selectors; }
487  Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
488  llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
489
490  void setPTHManager(PTHManager* pm);
491
492  PTHManager *getPTHManager() { return PTH.get(); }
493
494  void setExternalSource(ExternalPreprocessorSource *Source) {
495    ExternalSource = Source;
496  }
497
498  ExternalPreprocessorSource *getExternalSource() const {
499    return ExternalSource;
500  }
501
502  /// \brief Retrieve the module loader associated with this preprocessor.
503  ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
504
505  bool hadModuleLoaderFatalFailure() const {
506    return TheModuleLoader.HadFatalFailure;
507  }
508
509  /// \brief True if we are currently preprocessing a #if or #elif directive
510  bool isParsingIfOrElifDirective() const {
511    return ParsingIfOrElifDirective;
512  }
513
514  /// \brief Control whether the preprocessor retains comments in output.
515  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
516    this->KeepComments = KeepComments | KeepMacroComments;
517    this->KeepMacroComments = KeepMacroComments;
518  }
519
520  bool getCommentRetentionState() const { return KeepComments; }
521
522  void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
523  bool getPragmasEnabled() const { return PragmasEnabled; }
524
525  void SetSuppressIncludeNotFoundError(bool Suppress) {
526    SuppressIncludeNotFoundError = Suppress;
527  }
528
529  bool GetSuppressIncludeNotFoundError() {
530    return SuppressIncludeNotFoundError;
531  }
532
533  /// Sets whether the preprocessor is responsible for producing output or if
534  /// it is producing tokens to be consumed by Parse and Sema.
535  void setPreprocessedOutput(bool IsPreprocessedOutput) {
536    PreprocessedOutput = IsPreprocessedOutput;
537  }
538
539  /// Returns true if the preprocessor is responsible for generating output,
540  /// false if it is producing tokens to be consumed by Parse and Sema.
541  bool isPreprocessedOutput() const { return PreprocessedOutput; }
542
543  /// \brief Return true if we are lexing directly from the specified lexer.
544  bool isCurrentLexer(const PreprocessorLexer *L) const {
545    return CurPPLexer == L;
546  }
547
548  /// \brief Return the current lexer being lexed from.
549  ///
550  /// Note that this ignores any potentially active macro expansions and _Pragma
551  /// expansions going on at the time.
552  PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
553
554  /// \brief Return the current file lexer being lexed from.
555  ///
556  /// Note that this ignores any potentially active macro expansions and _Pragma
557  /// expansions going on at the time.
558  PreprocessorLexer *getCurrentFileLexer() const;
559
560  /// \brief Returns the FileID for the preprocessor predefines.
561  FileID getPredefinesFileID() const { return PredefinesFileID; }
562
563  /// \{
564  /// \brief Accessors for preprocessor callbacks.
565  ///
566  /// Note that this class takes ownership of any PPCallbacks object given to
567  /// it.
568  PPCallbacks *getPPCallbacks() const { return Callbacks; }
569  void addPPCallbacks(PPCallbacks *C) {
570    if (Callbacks)
571      C = new PPChainedCallbacks(C, Callbacks);
572    Callbacks = C;
573  }
574  /// \}
575
576  /// \brief Given an identifier, return its latest MacroDirective if it is
577  /// \#defined or null if it isn't \#define'd.
578  MacroDirective *getMacroDirective(IdentifierInfo *II) const {
579    if (!II->hasMacroDefinition())
580      return nullptr;
581
582    MacroDirective *MD = getMacroDirectiveHistory(II);
583    assert(MD->isDefined() && "Macro is undefined!");
584    return MD;
585  }
586
587  const MacroInfo *getMacroInfo(IdentifierInfo *II) const {
588    return const_cast<Preprocessor*>(this)->getMacroInfo(II);
589  }
590
591  MacroInfo *getMacroInfo(IdentifierInfo *II) {
592    if (MacroDirective *MD = getMacroDirective(II))
593      return MD->getMacroInfo();
594    return nullptr;
595  }
596
597  /// \brief Given an identifier, return the (probably #undef'd) MacroInfo
598  /// representing the most recent macro definition.
599  ///
600  /// One can iterate over all previous macro definitions from the most recent
601  /// one. This should only be called for identifiers that hadMacroDefinition().
602  MacroDirective *getMacroDirectiveHistory(const IdentifierInfo *II) const;
603
604  /// \brief Add a directive to the macro directive history for this identifier.
605  void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
606  DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
607                                             SourceLocation Loc,
608                                             bool isImported) {
609    DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc, isImported);
610    appendMacroDirective(II, MD);
611    return MD;
612  }
613  DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI){
614    return appendDefMacroDirective(II, MI, MI->getDefinitionLoc(), false);
615  }
616  /// \brief Set a MacroDirective that was loaded from a PCH file.
617  void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD);
618
619  /// \{
620  /// Iterators for the macro history table. Currently defined macros have
621  /// IdentifierInfo::hasMacroDefinition() set and an empty
622  /// MacroInfo::getUndefLoc() at the head of the list.
623  typedef llvm::DenseMap<const IdentifierInfo *,
624                         MacroDirective*>::const_iterator macro_iterator;
625  macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
626  macro_iterator macro_end(bool IncludeExternalMacros = true) const;
627  /// \}
628
629  /// \brief Return the name of the macro defined before \p Loc that has
630  /// spelling \p Tokens.  If there are multiple macros with same spelling,
631  /// return the last one defined.
632  StringRef getLastMacroWithSpelling(SourceLocation Loc,
633                                     ArrayRef<TokenValue> Tokens) const;
634
635  const std::string &getPredefines() const { return Predefines; }
636  /// \brief Set the predefines for this Preprocessor.
637  ///
638  /// These predefines are automatically injected when parsing the main file.
639  void setPredefines(const char *P) { Predefines = P; }
640  void setPredefines(const std::string &P) { Predefines = P; }
641
642  /// Return information about the specified preprocessor
643  /// identifier token.
644  IdentifierInfo *getIdentifierInfo(StringRef Name) const {
645    return &Identifiers.get(Name);
646  }
647
648  /// \brief Add the specified pragma handler to this preprocessor.
649  ///
650  /// If \p Namespace is non-null, then it is a token required to exist on the
651  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
652  void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
653  void AddPragmaHandler(PragmaHandler *Handler) {
654    AddPragmaHandler(StringRef(), Handler);
655  }
656
657  /// \brief Remove the specific pragma handler from this preprocessor.
658  ///
659  /// If \p Namespace is non-null, then it should be the namespace that
660  /// \p Handler was added to. It is an error to remove a handler that
661  /// has not been registered.
662  void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
663  void RemovePragmaHandler(PragmaHandler *Handler) {
664    RemovePragmaHandler(StringRef(), Handler);
665  }
666
667  /// Install empty handlers for all pragmas (making them ignored).
668  void IgnorePragmas();
669
670  /// \brief Add the specified comment handler to the preprocessor.
671  void addCommentHandler(CommentHandler *Handler);
672
673  /// \brief Remove the specified comment handler.
674  ///
675  /// It is an error to remove a handler that has not been registered.
676  void removeCommentHandler(CommentHandler *Handler);
677
678  /// \brief Set the code completion handler to the given object.
679  void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
680    CodeComplete = &Handler;
681  }
682
683  /// \brief Retrieve the current code-completion handler.
684  CodeCompletionHandler *getCodeCompletionHandler() const {
685    return CodeComplete;
686  }
687
688  /// \brief Clear out the code completion handler.
689  void clearCodeCompletionHandler() {
690    CodeComplete = nullptr;
691  }
692
693  /// \brief Hook used by the lexer to invoke the "natural language" code
694  /// completion point.
695  void CodeCompleteNaturalLanguage();
696
697  /// \brief Retrieve the preprocessing record, or NULL if there is no
698  /// preprocessing record.
699  PreprocessingRecord *getPreprocessingRecord() const { return Record; }
700
701  /// \brief Create a new preprocessing record, which will keep track of
702  /// all macro expansions, macro definitions, etc.
703  void createPreprocessingRecord();
704
705  /// \brief Enter the specified FileID as the main source file,
706  /// which implicitly adds the builtin defines etc.
707  void EnterMainSourceFile();
708
709  /// \brief Inform the preprocessor callbacks that processing is complete.
710  void EndSourceFile();
711
712  /// \brief Add a source file to the top of the include stack and
713  /// start lexing tokens from it instead of the current buffer.
714  ///
715  /// Emits a diagnostic, doesn't enter the file, and returns true on error.
716  bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
717                       SourceLocation Loc);
718
719  /// \brief Add a Macro to the top of the include stack and start lexing
720  /// tokens from it instead of the current buffer.
721  ///
722  /// \param Args specifies the tokens input to a function-like macro.
723  /// \param ILEnd specifies the location of the ')' for a function-like macro
724  /// or the identifier for an object-like macro.
725  void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
726                  MacroArgs *Args);
727
728  /// \brief Add a "macro" context to the top of the include stack,
729  /// which will cause the lexer to start returning the specified tokens.
730  ///
731  /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
732  /// will not be subject to further macro expansion. Otherwise, these tokens
733  /// will be re-macro-expanded when/if expansion is enabled.
734  ///
735  /// If \p OwnsTokens is false, this method assumes that the specified stream
736  /// of tokens has a permanent owner somewhere, so they do not need to be
737  /// copied. If it is true, it assumes the array of tokens is allocated with
738  /// \c new[] and must be freed.
739  void EnterTokenStream(const Token *Toks, unsigned NumToks,
740                        bool DisableMacroExpansion, bool OwnsTokens);
741
742  /// \brief Pop the current lexer/macro exp off the top of the lexer stack.
743  ///
744  /// This should only be used in situations where the current state of the
745  /// top-of-stack lexer is known.
746  void RemoveTopOfLexerStack();
747
748  /// From the point that this method is called, and until
749  /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
750  /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
751  /// make the Preprocessor re-lex the same tokens.
752  ///
753  /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
754  /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
755  /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
756  ///
757  /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
758  /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
759  /// tokens will continue indefinitely.
760  ///
761  void EnableBacktrackAtThisPos();
762
763  /// \brief Disable the last EnableBacktrackAtThisPos call.
764  void CommitBacktrackedTokens();
765
766  /// \brief Make Preprocessor re-lex the tokens that were lexed since
767  /// EnableBacktrackAtThisPos() was previously called.
768  void Backtrack();
769
770  /// \brief True if EnableBacktrackAtThisPos() was called and
771  /// caching of tokens is on.
772  bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
773
774  /// \brief Lex the next token for this preprocessor.
775  void Lex(Token &Result);
776
777  void LexAfterModuleImport(Token &Result);
778
779  /// \brief Lex a string literal, which may be the concatenation of multiple
780  /// string literals and may even come from macro expansion.
781  /// \returns true on success, false if a error diagnostic has been generated.
782  bool LexStringLiteral(Token &Result, std::string &String,
783                        const char *DiagnosticTag, bool AllowMacroExpansion) {
784    if (AllowMacroExpansion)
785      Lex(Result);
786    else
787      LexUnexpandedToken(Result);
788    return FinishLexStringLiteral(Result, String, DiagnosticTag,
789                                  AllowMacroExpansion);
790  }
791
792  /// \brief Complete the lexing of a string literal where the first token has
793  /// already been lexed (see LexStringLiteral).
794  bool FinishLexStringLiteral(Token &Result, std::string &String,
795                              const char *DiagnosticTag,
796                              bool AllowMacroExpansion);
797
798  /// \brief Lex a token.  If it's a comment, keep lexing until we get
799  /// something not a comment.
800  ///
801  /// This is useful in -E -C mode where comments would foul up preprocessor
802  /// directive handling.
803  void LexNonComment(Token &Result) {
804    do
805      Lex(Result);
806    while (Result.getKind() == tok::comment);
807  }
808
809  /// \brief Just like Lex, but disables macro expansion of identifier tokens.
810  void LexUnexpandedToken(Token &Result) {
811    // Disable macro expansion.
812    bool OldVal = DisableMacroExpansion;
813    DisableMacroExpansion = true;
814    // Lex the token.
815    Lex(Result);
816
817    // Reenable it.
818    DisableMacroExpansion = OldVal;
819  }
820
821  /// \brief Like LexNonComment, but this disables macro expansion of
822  /// identifier tokens.
823  void LexUnexpandedNonComment(Token &Result) {
824    do
825      LexUnexpandedToken(Result);
826    while (Result.getKind() == tok::comment);
827  }
828
829  /// \brief Parses a simple integer literal to get its numeric value.  Floating
830  /// point literals and user defined literals are rejected.  Used primarily to
831  /// handle pragmas that accept integer arguments.
832  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
833
834  /// Disables macro expansion everywhere except for preprocessor directives.
835  void SetMacroExpansionOnlyInDirectives() {
836    DisableMacroExpansion = true;
837    MacroExpansionInDirectivesOverride = true;
838  }
839
840  /// \brief Peeks ahead N tokens and returns that token without consuming any
841  /// tokens.
842  ///
843  /// LookAhead(0) returns the next token that would be returned by Lex(),
844  /// LookAhead(1) returns the token after it, etc.  This returns normal
845  /// tokens after phase 5.  As such, it is equivalent to using
846  /// 'Lex', not 'LexUnexpandedToken'.
847  const Token &LookAhead(unsigned N) {
848    if (CachedLexPos + N < CachedTokens.size())
849      return CachedTokens[CachedLexPos+N];
850    else
851      return PeekAhead(N+1);
852  }
853
854  /// \brief When backtracking is enabled and tokens are cached,
855  /// this allows to revert a specific number of tokens.
856  ///
857  /// Note that the number of tokens being reverted should be up to the last
858  /// backtrack position, not more.
859  void RevertCachedTokens(unsigned N) {
860    assert(isBacktrackEnabled() &&
861           "Should only be called when tokens are cached for backtracking");
862    assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
863         && "Should revert tokens up to the last backtrack position, not more");
864    assert(signed(CachedLexPos) - signed(N) >= 0 &&
865           "Corrupted backtrack positions ?");
866    CachedLexPos -= N;
867  }
868
869  /// \brief Enters a token in the token stream to be lexed next.
870  ///
871  /// If BackTrack() is called afterwards, the token will remain at the
872  /// insertion point.
873  void EnterToken(const Token &Tok) {
874    EnterCachingLexMode();
875    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
876  }
877
878  /// We notify the Preprocessor that if it is caching tokens (because
879  /// backtrack is enabled) it should replace the most recent cached tokens
880  /// with the given annotation token. This function has no effect if
881  /// backtracking is not enabled.
882  ///
883  /// Note that the use of this function is just for optimization, so that the
884  /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
885  /// invoked.
886  void AnnotateCachedTokens(const Token &Tok) {
887    assert(Tok.isAnnotation() && "Expected annotation token");
888    if (CachedLexPos != 0 && isBacktrackEnabled())
889      AnnotatePreviousCachedTokens(Tok);
890  }
891
892  /// Get the location of the last cached token, suitable for setting the end
893  /// location of an annotation token.
894  SourceLocation getLastCachedTokenLocation() const {
895    assert(CachedLexPos != 0);
896    return CachedTokens[CachedLexPos-1].getLocation();
897  }
898
899  /// \brief Replace the last token with an annotation token.
900  ///
901  /// Like AnnotateCachedTokens(), this routine replaces an
902  /// already-parsed (and resolved) token with an annotation
903  /// token. However, this routine only replaces the last token with
904  /// the annotation token; it does not affect any other cached
905  /// tokens. This function has no effect if backtracking is not
906  /// enabled.
907  void ReplaceLastTokenWithAnnotation(const Token &Tok) {
908    assert(Tok.isAnnotation() && "Expected annotation token");
909    if (CachedLexPos != 0 && isBacktrackEnabled())
910      CachedTokens[CachedLexPos-1] = Tok;
911  }
912
913  /// Update the current token to represent the provided
914  /// identifier, in order to cache an action performed by typo correction.
915  void TypoCorrectToken(const Token &Tok) {
916    assert(Tok.getIdentifierInfo() && "Expected identifier token");
917    if (CachedLexPos != 0 && isBacktrackEnabled())
918      CachedTokens[CachedLexPos-1] = Tok;
919  }
920
921  /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
922  /// CurTokenLexer pointers.
923  void recomputeCurLexerKind();
924
925  /// \brief Returns true if incremental processing is enabled
926  bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
927
928  /// \brief Enables the incremental processing
929  void enableIncrementalProcessing(bool value = true) {
930    IncrementalProcessing = value;
931  }
932
933  /// \brief Specify the point at which code-completion will be performed.
934  ///
935  /// \param File the file in which code completion should occur. If
936  /// this file is included multiple times, code-completion will
937  /// perform completion the first time it is included. If NULL, this
938  /// function clears out the code-completion point.
939  ///
940  /// \param Line the line at which code completion should occur
941  /// (1-based).
942  ///
943  /// \param Column the column at which code completion should occur
944  /// (1-based).
945  ///
946  /// \returns true if an error occurred, false otherwise.
947  bool SetCodeCompletionPoint(const FileEntry *File,
948                              unsigned Line, unsigned Column);
949
950  /// \brief Determine if we are performing code completion.
951  bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
952
953  /// \brief Returns the location of the code-completion point.
954  ///
955  /// Returns an invalid location if code-completion is not enabled or the file
956  /// containing the code-completion point has not been lexed yet.
957  SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
958
959  /// \brief Returns the start location of the file of code-completion point.
960  ///
961  /// Returns an invalid location if code-completion is not enabled or the file
962  /// containing the code-completion point has not been lexed yet.
963  SourceLocation getCodeCompletionFileLoc() const {
964    return CodeCompletionFileLoc;
965  }
966
967  /// \brief Returns true if code-completion is enabled and we have hit the
968  /// code-completion point.
969  bool isCodeCompletionReached() const { return CodeCompletionReached; }
970
971  /// \brief Note that we hit the code-completion point.
972  void setCodeCompletionReached() {
973    assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
974    CodeCompletionReached = true;
975    // Silence any diagnostics that occur after we hit the code-completion.
976    getDiagnostics().setSuppressAllDiagnostics(true);
977  }
978
979  /// \brief The location of the currently-active \#pragma clang
980  /// arc_cf_code_audited begin.
981  ///
982  /// Returns an invalid location if there is no such pragma active.
983  SourceLocation getPragmaARCCFCodeAuditedLoc() const {
984    return PragmaARCCFCodeAuditedLoc;
985  }
986
987  /// \brief Set the location of the currently-active \#pragma clang
988  /// arc_cf_code_audited begin.  An invalid location ends the pragma.
989  void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
990    PragmaARCCFCodeAuditedLoc = Loc;
991  }
992
993  /// \brief Instruct the preprocessor to skip part of the main source file.
994  ///
995  /// \param Bytes The number of bytes in the preamble to skip.
996  ///
997  /// \param StartOfLine Whether skipping these bytes puts the lexer at the
998  /// start of a line.
999  void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1000    SkipMainFilePreamble.first = Bytes;
1001    SkipMainFilePreamble.second = StartOfLine;
1002  }
1003
1004  /// Forwarding function for diagnostics.  This emits a diagnostic at
1005  /// the specified Token's location, translating the token's start
1006  /// position in the current buffer into a SourcePosition object for rendering.
1007  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1008    return Diags->Report(Loc, DiagID);
1009  }
1010
1011  DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1012    return Diags->Report(Tok.getLocation(), DiagID);
1013  }
1014
1015  /// Return the 'spelling' of the token at the given
1016  /// location; does not go up to the spelling location or down to the
1017  /// expansion location.
1018  ///
1019  /// \param buffer A buffer which will be used only if the token requires
1020  ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1021  /// \param invalid If non-null, will be set \c true if an error occurs.
1022  StringRef getSpelling(SourceLocation loc,
1023                        SmallVectorImpl<char> &buffer,
1024                        bool *invalid = nullptr) const {
1025    return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1026  }
1027
1028  /// \brief Return the 'spelling' of the Tok token.
1029  ///
1030  /// The spelling of a token is the characters used to represent the token in
1031  /// the source file after trigraph expansion and escaped-newline folding.  In
1032  /// particular, this wants to get the true, uncanonicalized, spelling of
1033  /// things like digraphs, UCNs, etc.
1034  ///
1035  /// \param Invalid If non-null, will be set \c true if an error occurs.
1036  std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1037    return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1038  }
1039
1040  /// \brief Get the spelling of a token into a preallocated buffer, instead
1041  /// of as an std::string.
1042  ///
1043  /// The caller is required to allocate enough space for the token, which is
1044  /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1045  /// actual result is returned.
1046  ///
1047  /// Note that this method may do two possible things: it may either fill in
1048  /// the buffer specified with characters, or it may *change the input pointer*
1049  /// to point to a constant buffer with the data already in it (avoiding a
1050  /// copy).  The caller is not allowed to modify the returned buffer pointer
1051  /// if an internal buffer is returned.
1052  unsigned getSpelling(const Token &Tok, const char *&Buffer,
1053                       bool *Invalid = nullptr) const {
1054    return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1055  }
1056
1057  /// \brief Get the spelling of a token into a SmallVector.
1058  ///
1059  /// Note that the returned StringRef may not point to the
1060  /// supplied buffer if a copy can be avoided.
1061  StringRef getSpelling(const Token &Tok,
1062                        SmallVectorImpl<char> &Buffer,
1063                        bool *Invalid = nullptr) const;
1064
1065  /// \brief Relex the token at the specified location.
1066  /// \returns true if there was a failure, false on success.
1067  bool getRawToken(SourceLocation Loc, Token &Result,
1068                   bool IgnoreWhiteSpace = false) {
1069    return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1070  }
1071
1072  /// \brief Given a Token \p Tok that is a numeric constant with length 1,
1073  /// return the character.
1074  char
1075  getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1076                                              bool *Invalid = nullptr) const {
1077    assert(Tok.is(tok::numeric_constant) &&
1078           Tok.getLength() == 1 && "Called on unsupported token");
1079    assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1080
1081    // If the token is carrying a literal data pointer, just use it.
1082    if (const char *D = Tok.getLiteralData())
1083      return *D;
1084
1085    // Otherwise, fall back on getCharacterData, which is slower, but always
1086    // works.
1087    return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1088  }
1089
1090  /// \brief Retrieve the name of the immediate macro expansion.
1091  ///
1092  /// This routine starts from a source location, and finds the name of the
1093  /// macro responsible for its immediate expansion. It looks through any
1094  /// intervening macro argument expansions to compute this. It returns a
1095  /// StringRef that refers to the SourceManager-owned buffer of the source
1096  /// where that macro name is spelled. Thus, the result shouldn't out-live
1097  /// the SourceManager.
1098  StringRef getImmediateMacroName(SourceLocation Loc) {
1099    return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1100  }
1101
1102  /// \brief Plop the specified string into a scratch buffer and set the
1103  /// specified token's location and length to it.
1104  ///
1105  /// If specified, the source location provides a location of the expansion
1106  /// point of the token.
1107  void CreateString(StringRef Str, Token &Tok,
1108                    SourceLocation ExpansionLocStart = SourceLocation(),
1109                    SourceLocation ExpansionLocEnd = SourceLocation());
1110
1111  /// \brief Computes the source location just past the end of the
1112  /// token at this source location.
1113  ///
1114  /// This routine can be used to produce a source location that
1115  /// points just past the end of the token referenced by \p Loc, and
1116  /// is generally used when a diagnostic needs to point just after a
1117  /// token where it expected something different that it received. If
1118  /// the returned source location would not be meaningful (e.g., if
1119  /// it points into a macro), this routine returns an invalid
1120  /// source location.
1121  ///
1122  /// \param Offset an offset from the end of the token, where the source
1123  /// location should refer to. The default offset (0) produces a source
1124  /// location pointing just past the end of the token; an offset of 1 produces
1125  /// a source location pointing to the last character in the token, etc.
1126  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1127    return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1128  }
1129
1130  /// \brief Returns true if the given MacroID location points at the first
1131  /// token of the macro expansion.
1132  ///
1133  /// \param MacroBegin If non-null and function returns true, it is set to
1134  /// begin location of the macro.
1135  bool isAtStartOfMacroExpansion(SourceLocation loc,
1136                                 SourceLocation *MacroBegin = nullptr) const {
1137    return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1138                                            MacroBegin);
1139  }
1140
1141  /// \brief Returns true if the given MacroID location points at the last
1142  /// token of the macro expansion.
1143  ///
1144  /// \param MacroEnd If non-null and function returns true, it is set to
1145  /// end location of the macro.
1146  bool isAtEndOfMacroExpansion(SourceLocation loc,
1147                               SourceLocation *MacroEnd = nullptr) const {
1148    return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1149  }
1150
1151  /// \brief Print the token to stderr, used for debugging.
1152  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1153  void DumpLocation(SourceLocation Loc) const;
1154  void DumpMacro(const MacroInfo &MI) const;
1155
1156  /// \brief Given a location that specifies the start of a
1157  /// token, return a new location that specifies a character within the token.
1158  SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1159                                         unsigned Char) const {
1160    return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1161  }
1162
1163  /// \brief Increment the counters for the number of token paste operations
1164  /// performed.
1165  ///
1166  /// If fast was specified, this is a 'fast paste' case we handled.
1167  void IncrementPasteCounter(bool isFast) {
1168    if (isFast)
1169      ++NumFastTokenPaste;
1170    else
1171      ++NumTokenPaste;
1172  }
1173
1174  void PrintStats();
1175
1176  size_t getTotalMemory() const;
1177
1178  /// When the macro expander pastes together a comment (/##/) in Microsoft
1179  /// mode, this method handles updating the current state, returning the
1180  /// token on the next source line.
1181  void HandleMicrosoftCommentPaste(Token &Tok);
1182
1183  //===--------------------------------------------------------------------===//
1184  // Preprocessor callback methods.  These are invoked by a lexer as various
1185  // directives and events are found.
1186
1187  /// Given a tok::raw_identifier token, look up the
1188  /// identifier information for the token and install it into the token,
1189  /// updating the token kind accordingly.
1190  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1191
1192private:
1193  llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1194
1195public:
1196
1197  /// \brief Specifies the reason for poisoning an identifier.
1198  ///
1199  /// If that identifier is accessed while poisoned, then this reason will be
1200  /// used instead of the default "poisoned" diagnostic.
1201  void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1202
1203  /// \brief Display reason for poisoned identifier.
1204  void HandlePoisonedIdentifier(Token & Tok);
1205
1206  void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1207    if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1208      if(II->isPoisoned()) {
1209        HandlePoisonedIdentifier(Identifier);
1210      }
1211    }
1212  }
1213
1214private:
1215  /// Identifiers used for SEH handling in Borland. These are only
1216  /// allowed in particular circumstances
1217  // __except block
1218  IdentifierInfo *Ident__exception_code,
1219                 *Ident___exception_code,
1220                 *Ident_GetExceptionCode;
1221  // __except filter expression
1222  IdentifierInfo *Ident__exception_info,
1223                 *Ident___exception_info,
1224                 *Ident_GetExceptionInfo;
1225  // __finally
1226  IdentifierInfo *Ident__abnormal_termination,
1227                 *Ident___abnormal_termination,
1228                 *Ident_AbnormalTermination;
1229
1230  const char *getCurLexerEndPos();
1231
1232public:
1233  void PoisonSEHIdentifiers(bool Poison = true); // Borland
1234
1235  /// \brief Callback invoked when the lexer reads an identifier and has
1236  /// filled in the tokens IdentifierInfo member.
1237  ///
1238  /// This callback potentially macro expands it or turns it into a named
1239  /// token (like 'for').
1240  ///
1241  /// \returns true if we actually computed a token, false if we need to
1242  /// lex again.
1243  bool HandleIdentifier(Token &Identifier);
1244
1245
1246  /// \brief Callback invoked when the lexer hits the end of the current file.
1247  ///
1248  /// This either returns the EOF token and returns true, or
1249  /// pops a level off the include stack and returns false, at which point the
1250  /// client should call lex again.
1251  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1252
1253  /// \brief Callback invoked when the current TokenLexer hits the end of its
1254  /// token stream.
1255  bool HandleEndOfTokenLexer(Token &Result);
1256
1257  /// \brief Callback invoked when the lexer sees a # token at the start of a
1258  /// line.
1259  ///
1260  /// This consumes the directive, modifies the lexer/preprocessor state, and
1261  /// advances the lexer(s) so that the next token read is the correct one.
1262  void HandleDirective(Token &Result);
1263
1264  /// \brief Ensure that the next token is a tok::eod token.
1265  ///
1266  /// If not, emit a diagnostic and consume up until the eod.
1267  /// If \p EnableMacros is true, then we consider macros that expand to zero
1268  /// tokens as being ok.
1269  void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1270
1271  /// \brief Read and discard all tokens remaining on the current line until
1272  /// the tok::eod token is found.
1273  void DiscardUntilEndOfDirective();
1274
1275  /// \brief Returns true if the preprocessor has seen a use of
1276  /// __DATE__ or __TIME__ in the file so far.
1277  bool SawDateOrTime() const {
1278    return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1279  }
1280  unsigned getCounterValue() const { return CounterValue; }
1281  void setCounterValue(unsigned V) { CounterValue = V; }
1282
1283  /// \brief Retrieves the module that we're currently building, if any.
1284  Module *getCurrentModule();
1285
1286  /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
1287  MacroInfo *AllocateMacroInfo(SourceLocation L);
1288
1289  /// \brief Allocate a new MacroInfo object loaded from an AST file.
1290  MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L,
1291                                           unsigned SubModuleID);
1292
1293  /// \brief Turn the specified lexer token into a fully checked and spelled
1294  /// filename, e.g. as an operand of \#include.
1295  ///
1296  /// The caller is expected to provide a buffer that is large enough to hold
1297  /// the spelling of the filename, but is also expected to handle the case
1298  /// when this method decides to use a different buffer.
1299  ///
1300  /// \returns true if the input filename was in <>'s or false if it was
1301  /// in ""'s.
1302  bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1303
1304  /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
1305  ///
1306  /// Returns null on failure.  \p isAngled indicates whether the file
1307  /// reference is for system \#include's or not (i.e. using <> instead of "").
1308  const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1309                              bool isAngled, const DirectoryLookup *FromDir,
1310                              const DirectoryLookup *&CurDir,
1311                              SmallVectorImpl<char> *SearchPath,
1312                              SmallVectorImpl<char> *RelativePath,
1313                              ModuleMap::KnownHeader *SuggestedModule,
1314                              bool SkipCache = false);
1315
1316  /// \brief Get the DirectoryLookup structure used to find the current
1317  /// FileEntry, if CurLexer is non-null and if applicable.
1318  ///
1319  /// This allows us to implement \#include_next and find directory-specific
1320  /// properties.
1321  const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1322
1323  /// \brief Return true if we're in the top-level file, not in a \#include.
1324  bool isInPrimaryFile() const;
1325
1326  /// \brief Handle cases where the \#include name is expanded
1327  /// from a macro as multiple tokens, which need to be glued together.
1328  ///
1329  /// This occurs for code like:
1330  /// \code
1331  ///    \#define FOO <x/y.h>
1332  ///    \#include FOO
1333  /// \endcode
1334  /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1335  ///
1336  /// This code concatenates and consumes tokens up to the '>' token.  It
1337  /// returns false if the > was found, otherwise it returns true if it finds
1338  /// and consumes the EOD marker.
1339  bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1340                              SourceLocation &End);
1341
1342  /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1343  /// followed by EOD.  Return true if the token is not a valid on-off-switch.
1344  bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1345
1346  bool CheckMacroName(Token &MacroNameTok, char isDefineUndef);
1347
1348private:
1349
1350  void PushIncludeMacroStack() {
1351    IncludeMacroStack.push_back(IncludeStackInfo(
1352        CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer),
1353        CurPPLexer, std::move(CurTokenLexer), CurDirLookup));
1354    CurPPLexer = nullptr;
1355  }
1356
1357  void PopIncludeMacroStack() {
1358    CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1359    CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1360    CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1361    CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1362    CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
1363    CurSubmodule = IncludeMacroStack.back().TheSubmodule;
1364    CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1365    IncludeMacroStack.pop_back();
1366  }
1367
1368  void PropagateLineStartLeadingSpaceInfo(Token &Result);
1369
1370  /// \brief Allocate a new MacroInfo object.
1371  MacroInfo *AllocateMacroInfo();
1372
1373  DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1374                                               SourceLocation Loc,
1375                                               bool isImported);
1376  UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1377  VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1378                                                             bool isPublic);
1379
1380  /// \brief Release the specified MacroInfo for re-use.
1381  ///
1382  /// This memory will  be reused for allocating new MacroInfo objects.
1383  void ReleaseMacroInfo(MacroInfo* MI);
1384
1385  /// \brief Lex and validate a macro name, which occurs after a
1386  /// \#define or \#undef.
1387  ///
1388  /// This emits a diagnostic, sets the token kind to eod,
1389  /// and discards the rest of the macro line if the macro name is invalid.
1390  void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
1391
1392  /// The ( starting an argument list of a macro definition has just been read.
1393  /// Lex the rest of the arguments and the closing ), updating \p MI with
1394  /// what we learn and saving in \p LastTok the last token read.
1395  /// Return true if an error occurs parsing the arg list.
1396  bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
1397
1398  /// We just read a \#if or related directive and decided that the
1399  /// subsequent tokens are in the \#if'd out portion of the
1400  /// file.  Lex the rest of the file, until we see an \#endif.  If \p
1401  /// FoundNonSkipPortion is true, then we have already emitted code for part of
1402  /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1403  /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1404  /// already seen one so a \#else directive is a duplicate.  When this returns,
1405  /// the caller can lex the first valid token.
1406  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1407                                    bool FoundNonSkipPortion, bool FoundElse,
1408                                    SourceLocation ElseLoc = SourceLocation());
1409
1410  /// \brief A fast PTH version of SkipExcludedConditionalBlock.
1411  void PTHSkipExcludedConditionalBlock();
1412
1413  /// \brief Evaluate an integer constant expression that may occur after a
1414  /// \#if or \#elif directive and return it as a bool.
1415  ///
1416  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1417  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1418
1419  /// \brief Install the standard preprocessor pragmas:
1420  /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1421  void RegisterBuiltinPragmas();
1422
1423  /// \brief Register builtin macros such as __LINE__ with the identifier table.
1424  void RegisterBuiltinMacros();
1425
1426  /// If an identifier token is read that is to be expanded as a macro, handle
1427  /// it and return the next token as 'Tok'.  If we lexed a token, return true;
1428  /// otherwise the caller should lex again.
1429  bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
1430
1431  /// \brief Cache macro expanded tokens for TokenLexers.
1432  //
1433  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1434  /// going to lex in the cache and when it finishes the tokens are removed
1435  /// from the end of the cache.
1436  Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1437                                  ArrayRef<Token> tokens);
1438  void removeCachedMacroExpandedTokensOfLastLexer();
1439  friend void TokenLexer::ExpandFunctionArguments();
1440
1441  /// Determine whether the next preprocessor token to be
1442  /// lexed is a '('.  If so, consume the token and return true, if not, this
1443  /// method should have no observable side-effect on the lexed tokens.
1444  bool isNextPPTokenLParen();
1445
1446  /// After reading "MACRO(", this method is invoked to read all of the formal
1447  /// arguments specified for the macro invocation.  Returns null on error.
1448  MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
1449                                       SourceLocation &ExpansionEnd);
1450
1451  /// \brief If an identifier token is read that is to be expanded
1452  /// as a builtin macro, handle it and return the next token as 'Tok'.
1453  void ExpandBuiltinMacro(Token &Tok);
1454
1455  /// \brief Read a \c _Pragma directive, slice it up, process it, then
1456  /// return the first token after the directive.
1457  /// This assumes that the \c _Pragma token has just been read into \p Tok.
1458  void Handle_Pragma(Token &Tok);
1459
1460  /// \brief Like Handle_Pragma except the pragma text is not enclosed within
1461  /// a string literal.
1462  void HandleMicrosoft__pragma(Token &Tok);
1463
1464  /// \brief Add a lexer to the top of the include stack and
1465  /// start lexing tokens from it instead of the current buffer.
1466  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1467
1468  /// \brief Add a lexer to the top of the include stack and
1469  /// start getting tokens from it using the PTH cache.
1470  void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1471
1472  /// \brief Set the FileID for the preprocessor predefines.
1473  void setPredefinesFileID(FileID FID) {
1474    assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
1475    PredefinesFileID = FID;
1476  }
1477
1478  /// \brief Returns true if we are lexing from a file and not a
1479  /// pragma or a macro.
1480  static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1481    return L ? !L->isPragmaLexer() : P != nullptr;
1482  }
1483
1484  static bool IsFileLexer(const IncludeStackInfo& I) {
1485    return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
1486  }
1487
1488  bool IsFileLexer() const {
1489    return IsFileLexer(CurLexer.get(), CurPPLexer);
1490  }
1491
1492  //===--------------------------------------------------------------------===//
1493  // Caching stuff.
1494  void CachingLex(Token &Result);
1495  bool InCachingLexMode() const {
1496    // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1497    // that we are past EOF, not that we are in CachingLex mode.
1498    return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
1499           !IncludeMacroStack.empty();
1500  }
1501  void EnterCachingLexMode();
1502  void ExitCachingLexMode() {
1503    if (InCachingLexMode())
1504      RemoveTopOfLexerStack();
1505  }
1506  const Token &PeekAhead(unsigned N);
1507  void AnnotatePreviousCachedTokens(const Token &Tok);
1508
1509  //===--------------------------------------------------------------------===//
1510  /// Handle*Directive - implement the various preprocessor directives.  These
1511  /// should side-effect the current preprocessor object so that the next call
1512  /// to Lex() will return the appropriate token next.
1513  void HandleLineDirective(Token &Tok);
1514  void HandleDigitDirective(Token &Tok);
1515  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1516  void HandleIdentSCCSDirective(Token &Tok);
1517  void HandleMacroPublicDirective(Token &Tok);
1518  void HandleMacroPrivateDirective(Token &Tok);
1519
1520  // File inclusion.
1521  void HandleIncludeDirective(SourceLocation HashLoc,
1522                              Token &Tok,
1523                              const DirectoryLookup *LookupFrom = nullptr,
1524                              bool isImport = false);
1525  void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1526  void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1527  void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1528  void HandleMicrosoftImportDirective(Token &Tok);
1529
1530  // Module inclusion testing.
1531  /// \brief Find the module for the source or header file that \p FilenameLoc
1532  /// points to.
1533  Module *getModuleForLocation(SourceLocation FilenameLoc);
1534
1535  // Macro handling.
1536  void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
1537  void HandleUndefDirective(Token &Tok);
1538
1539  // Conditional Inclusion.
1540  void HandleIfdefDirective(Token &Tok, bool isIfndef,
1541                            bool ReadAnyTokensBeforeDirective);
1542  void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1543  void HandleEndifDirective(Token &Tok);
1544  void HandleElseDirective(Token &Tok);
1545  void HandleElifDirective(Token &Tok);
1546
1547  // Pragmas.
1548  void HandlePragmaDirective(SourceLocation IntroducerLoc,
1549                             PragmaIntroducerKind Introducer);
1550public:
1551  void HandlePragmaOnce(Token &OnceTok);
1552  void HandlePragmaMark();
1553  void HandlePragmaPoison(Token &PoisonTok);
1554  void HandlePragmaSystemHeader(Token &SysHeaderTok);
1555  void HandlePragmaDependency(Token &DependencyTok);
1556  void HandlePragmaPushMacro(Token &Tok);
1557  void HandlePragmaPopMacro(Token &Tok);
1558  void HandlePragmaIncludeAlias(Token &Tok);
1559  IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1560
1561  // Return true and store the first token only if any CommentHandler
1562  // has inserted some tokens and getCommentRetentionState() is false.
1563  bool HandleComment(Token &Token, SourceRange Comment);
1564
1565  /// \brief A macro is used, update information about macros that need unused
1566  /// warnings.
1567  void markMacroAsUsed(MacroInfo *MI);
1568};
1569
1570/// \brief Abstract base class that describes a handler that will receive
1571/// source ranges for each of the comments encountered in the source file.
1572class CommentHandler {
1573public:
1574  virtual ~CommentHandler();
1575
1576  // The handler shall return true if it has pushed any tokens
1577  // to be read using e.g. EnterToken or EnterTokenStream.
1578  virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1579};
1580
1581}  // end namespace clang
1582
1583#endif
1584