1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Defines the clang::Preprocessor interface.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16#define LLVM_CLANG_LEX_PREPROCESSOR_H
17
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/Diagnostic.h"
20#include "clang/Basic/IdentifierTable.h"
21#include "clang/Basic/SourceLocation.h"
22#include "clang/Lex/Lexer.h"
23#include "clang/Lex/MacroInfo.h"
24#include "clang/Lex/ModuleMap.h"
25#include "clang/Lex/PPCallbacks.h"
26#include "clang/Lex/PTHLexer.h"
27#include "clang/Lex/TokenLexer.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/IntrusiveRefCntPtr.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/TinyPtrVector.h"
34#include "llvm/Support/Allocator.h"
35#include "llvm/Support/Registry.h"
36#include <memory>
37#include <vector>
38
39namespace llvm {
40  template<unsigned InternalLen> class SmallString;
41}
42
43namespace clang {
44
45class SourceManager;
46class ExternalPreprocessorSource;
47class FileManager;
48class FileEntry;
49class HeaderSearch;
50class MemoryBufferCache;
51class PragmaNamespace;
52class PragmaHandler;
53class CommentHandler;
54class ScratchBuffer;
55class TargetInfo;
56class PPCallbacks;
57class CodeCompletionHandler;
58class DirectoryLookup;
59class PreprocessingRecord;
60class ModuleLoader;
61class PTHManager;
62class PreprocessorOptions;
63
64/// \brief Stores token information for comparing actual tokens with
65/// predefined values.  Only handles simple tokens and identifiers.
66class TokenValue {
67  tok::TokenKind Kind;
68  IdentifierInfo *II;
69
70public:
71  TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
72    assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
73    assert(Kind != tok::identifier &&
74           "Identifiers should be created by TokenValue(IdentifierInfo *)");
75    assert(!tok::isLiteral(Kind) && "Literals are not supported.");
76    assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
77  }
78  TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
79  bool operator==(const Token &Tok) const {
80    return Tok.getKind() == Kind &&
81        (!II || II == Tok.getIdentifierInfo());
82  }
83};
84
85/// \brief Context in which macro name is used.
86enum MacroUse {
87  MU_Other  = 0,  // other than #define or #undef
88  MU_Define = 1,  // macro name specified in #define
89  MU_Undef  = 2   // macro name specified in #undef
90};
91
92/// \brief Engages in a tight little dance with the lexer to efficiently
93/// preprocess tokens.
94///
95/// Lexers know only about tokens within a single source file, and don't
96/// know anything about preprocessor-level issues like the \#include stack,
97/// token expansion, etc.
98class Preprocessor {
99  friend class VariadicMacroScopeGuard;
100  friend class VAOptDefinitionContext;
101  std::shared_ptr<PreprocessorOptions> PPOpts;
102  DiagnosticsEngine        *Diags;
103  LangOptions       &LangOpts;
104  const TargetInfo  *Target;
105  const TargetInfo  *AuxTarget;
106  FileManager       &FileMgr;
107  SourceManager     &SourceMgr;
108  MemoryBufferCache &PCMCache;
109  std::unique_ptr<ScratchBuffer> ScratchBuf;
110  HeaderSearch      &HeaderInfo;
111  ModuleLoader      &TheModuleLoader;
112
113  /// \brief External source of macros.
114  ExternalPreprocessorSource *ExternalSource;
115
116
117  /// An optional PTHManager object used for getting tokens from
118  /// a token cache rather than lexing the original source file.
119  std::unique_ptr<PTHManager> PTH;
120
121  /// A BumpPtrAllocator object used to quickly allocate and release
122  /// objects internal to the Preprocessor.
123  llvm::BumpPtrAllocator BP;
124
125  /// Identifiers for builtin macros and other builtins.
126  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
127  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
128  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
129  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
130  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
131  IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
132  IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
133  IdentifierInfo *Ident__identifier;               // __identifier
134  IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
135  IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
136  IdentifierInfo *Ident__has_feature;              // __has_feature
137  IdentifierInfo *Ident__has_extension;            // __has_extension
138  IdentifierInfo *Ident__has_builtin;              // __has_builtin
139  IdentifierInfo *Ident__has_attribute;            // __has_attribute
140  IdentifierInfo *Ident__has_include;              // __has_include
141  IdentifierInfo *Ident__has_include_next;         // __has_include_next
142  IdentifierInfo *Ident__has_warning;              // __has_warning
143  IdentifierInfo *Ident__is_identifier;            // __is_identifier
144  IdentifierInfo *Ident__building_module;          // __building_module
145  IdentifierInfo *Ident__MODULE__;                 // __MODULE__
146  IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
147  IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
148
149  SourceLocation DATELoc, TIMELoc;
150  unsigned CounterValue;  // Next __COUNTER__ value.
151
152  enum {
153    /// \brief Maximum depth of \#includes.
154    MaxAllowedIncludeStackDepth = 200
155  };
156
157  // State that is set before the preprocessor begins.
158  bool KeepComments : 1;
159  bool KeepMacroComments : 1;
160  bool SuppressIncludeNotFoundError : 1;
161
162  // State that changes while the preprocessor runs:
163  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
164
165  /// Whether the preprocessor owns the header search object.
166  bool OwnsHeaderSearch : 1;
167
168  /// True if macro expansion is disabled.
169  bool DisableMacroExpansion : 1;
170
171  /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
172  /// when parsing preprocessor directives.
173  bool MacroExpansionInDirectivesOverride : 1;
174
175  class ResetMacroExpansionHelper;
176
177  /// \brief Whether we have already loaded macros from the external source.
178  mutable bool ReadMacrosFromExternalSource : 1;
179
180  /// \brief True if pragmas are enabled.
181  bool PragmasEnabled : 1;
182
183  /// \brief True if the current build action is a preprocessing action.
184  bool PreprocessedOutput : 1;
185
186  /// \brief True if we are currently preprocessing a #if or #elif directive
187  bool ParsingIfOrElifDirective;
188
189  /// \brief True if we are pre-expanding macro arguments.
190  bool InMacroArgPreExpansion;
191
192  /// \brief Mapping/lookup information for all identifiers in
193  /// the program, including program keywords.
194  mutable IdentifierTable Identifiers;
195
196  /// \brief This table contains all the selectors in the program.
197  ///
198  /// Unlike IdentifierTable above, this table *isn't* populated by the
199  /// preprocessor. It is declared/expanded here because its role/lifetime is
200  /// conceptually similar to the IdentifierTable. In addition, the current
201  /// control flow (in clang::ParseAST()), make it convenient to put here.
202  ///
203  /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
204  /// the lifetime of the preprocessor.
205  SelectorTable Selectors;
206
207  /// \brief Information about builtins.
208  Builtin::Context BuiltinInfo;
209
210  /// \brief Tracks all of the pragmas that the client registered
211  /// with this preprocessor.
212  std::unique_ptr<PragmaNamespace> PragmaHandlers;
213
214  /// \brief Pragma handlers of the original source is stored here during the
215  /// parsing of a model file.
216  std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
217
218  /// \brief Tracks all of the comment handlers that the client registered
219  /// with this preprocessor.
220  std::vector<CommentHandler *> CommentHandlers;
221
222  /// \brief True if we want to ignore EOF token and continue later on (thus
223  /// avoid tearing the Lexer and etc. down).
224  bool IncrementalProcessing;
225
226  /// The kind of translation unit we are processing.
227  TranslationUnitKind TUKind;
228
229  /// \brief The code-completion handler.
230  CodeCompletionHandler *CodeComplete;
231
232  /// \brief The file that we're performing code-completion for, if any.
233  const FileEntry *CodeCompletionFile;
234
235  /// \brief The offset in file for the code-completion point.
236  unsigned CodeCompletionOffset;
237
238  /// \brief The location for the code-completion point. This gets instantiated
239  /// when the CodeCompletionFile gets \#include'ed for preprocessing.
240  SourceLocation CodeCompletionLoc;
241
242  /// \brief The start location for the file of the code-completion point.
243  ///
244  /// This gets instantiated when the CodeCompletionFile gets \#include'ed
245  /// for preprocessing.
246  SourceLocation CodeCompletionFileLoc;
247
248  /// \brief The source location of the \c import contextual keyword we just
249  /// lexed, if any.
250  SourceLocation ModuleImportLoc;
251
252  /// \brief The module import path that we're currently processing.
253  SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
254
255  /// \brief Whether the last token we lexed was an '@'.
256  bool LastTokenWasAt;
257
258  /// \brief Whether the module import expects an identifier next. Otherwise,
259  /// it expects a '.' or ';'.
260  bool ModuleImportExpectsIdentifier;
261
262  /// \brief The source location of the currently-active
263  /// \#pragma clang arc_cf_code_audited begin.
264  SourceLocation PragmaARCCFCodeAuditedLoc;
265
266  /// \brief The source location of the currently-active
267  /// \#pragma clang assume_nonnull begin.
268  SourceLocation PragmaAssumeNonNullLoc;
269
270  /// \brief True if we hit the code-completion point.
271  bool CodeCompletionReached;
272
273  /// \brief The code completion token containing the information
274  /// on the stem that is to be code completed.
275  IdentifierInfo *CodeCompletionII;
276
277  /// \brief The directory that the main file should be considered to occupy,
278  /// if it does not correspond to a real file (as happens when building a
279  /// module).
280  const DirectoryEntry *MainFileDir;
281
282  /// \brief The number of bytes that we will initially skip when entering the
283  /// main file, along with a flag that indicates whether skipping this number
284  /// of bytes will place the lexer at the start of a line.
285  ///
286  /// This is used when loading a precompiled preamble.
287  std::pair<int, bool> SkipMainFilePreamble;
288
289  class PreambleConditionalStackStore {
290    enum State {
291      Off = 0,
292      Recording = 1,
293      Replaying = 2,
294    };
295
296  public:
297    PreambleConditionalStackStore() : ConditionalStackState(Off) {}
298
299    void startRecording() { ConditionalStackState = Recording; }
300    void startReplaying() { ConditionalStackState = Replaying; }
301    bool isRecording() const { return ConditionalStackState == Recording; }
302    bool isReplaying() const { return ConditionalStackState == Replaying; }
303
304    ArrayRef<PPConditionalInfo> getStack() const {
305      return ConditionalStack;
306    }
307
308    void doneReplaying() {
309      ConditionalStack.clear();
310      ConditionalStackState = Off;
311    }
312
313    void setStack(ArrayRef<PPConditionalInfo> s) {
314      if (!isRecording() && !isReplaying())
315        return;
316      ConditionalStack.clear();
317      ConditionalStack.append(s.begin(), s.end());
318    }
319
320    bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
321
322  private:
323    SmallVector<PPConditionalInfo, 4> ConditionalStack;
324    State ConditionalStackState;
325  } PreambleConditionalStack;
326
327  /// \brief The current top of the stack that we're lexing from if
328  /// not expanding a macro and we are lexing directly from source code.
329  ///
330  /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
331  std::unique_ptr<Lexer> CurLexer;
332
333  /// \brief The current top of stack that we're lexing from if
334  /// not expanding from a macro and we are lexing from a PTH cache.
335  ///
336  /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
337  std::unique_ptr<PTHLexer> CurPTHLexer;
338
339  /// \brief The current top of the stack what we're lexing from
340  /// if not expanding a macro.
341  ///
342  /// This is an alias for either CurLexer or  CurPTHLexer.
343  PreprocessorLexer *CurPPLexer;
344
345  /// \brief Used to find the current FileEntry, if CurLexer is non-null
346  /// and if applicable.
347  ///
348  /// This allows us to implement \#include_next and find directory-specific
349  /// properties.
350  const DirectoryLookup *CurDirLookup;
351
352  /// \brief The current macro we are expanding, if we are expanding a macro.
353  ///
354  /// One of CurLexer and CurTokenLexer must be null.
355  std::unique_ptr<TokenLexer> CurTokenLexer;
356
357  /// \brief The kind of lexer we're currently working with.
358  enum CurLexerKind {
359    CLK_Lexer,
360    CLK_PTHLexer,
361    CLK_TokenLexer,
362    CLK_CachingLexer,
363    CLK_LexAfterModuleImport
364  } CurLexerKind;
365
366  /// \brief If the current lexer is for a submodule that is being built, this
367  /// is that submodule.
368  Module *CurLexerSubmodule;
369
370  /// \brief Keeps track of the stack of files currently
371  /// \#included, and macros currently being expanded from, not counting
372  /// CurLexer/CurTokenLexer.
373  struct IncludeStackInfo {
374    enum CurLexerKind           CurLexerKind;
375    Module                     *TheSubmodule;
376    std::unique_ptr<Lexer>      TheLexer;
377    std::unique_ptr<PTHLexer>   ThePTHLexer;
378    PreprocessorLexer          *ThePPLexer;
379    std::unique_ptr<TokenLexer> TheTokenLexer;
380    const DirectoryLookup      *TheDirLookup;
381
382    // The following constructors are completely useless copies of the default
383    // versions, only needed to pacify MSVC.
384    IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
385                     std::unique_ptr<Lexer> &&TheLexer,
386                     std::unique_ptr<PTHLexer> &&ThePTHLexer,
387                     PreprocessorLexer *ThePPLexer,
388                     std::unique_ptr<TokenLexer> &&TheTokenLexer,
389                     const DirectoryLookup *TheDirLookup)
390        : CurLexerKind(std::move(CurLexerKind)),
391          TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
392          ThePTHLexer(std::move(ThePTHLexer)),
393          ThePPLexer(std::move(ThePPLexer)),
394          TheTokenLexer(std::move(TheTokenLexer)),
395          TheDirLookup(std::move(TheDirLookup)) {}
396  };
397  std::vector<IncludeStackInfo> IncludeMacroStack;
398
399  /// \brief Actions invoked when some preprocessor activity is
400  /// encountered (e.g. a file is \#included, etc).
401  std::unique_ptr<PPCallbacks> Callbacks;
402
403  struct MacroExpandsInfo {
404    Token Tok;
405    MacroDefinition MD;
406    SourceRange Range;
407    MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
408      : Tok(Tok), MD(MD), Range(Range) { }
409  };
410  SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
411
412  /// Information about a name that has been used to define a module macro.
413  struct ModuleMacroInfo {
414    ModuleMacroInfo(MacroDirective *MD)
415        : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {}
416
417    /// The most recent macro directive for this identifier.
418    MacroDirective *MD;
419    /// The active module macros for this identifier.
420    llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros;
421    /// The generation number at which we last updated ActiveModuleMacros.
422    /// \see Preprocessor::VisibleModules.
423    unsigned ActiveModuleMacrosGeneration;
424    /// Whether this macro name is ambiguous.
425    bool IsAmbiguous;
426    /// The module macros that are overridden by this macro.
427    llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros;
428  };
429
430  /// The state of a macro for an identifier.
431  class MacroState {
432    mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
433
434    ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
435                                   const IdentifierInfo *II) const {
436      if (II->isOutOfDate())
437        PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
438      // FIXME: Find a spare bit on IdentifierInfo and store a
439      //        HasModuleMacros flag.
440      if (!II->hasMacroDefinition() ||
441          (!PP.getLangOpts().Modules &&
442           !PP.getLangOpts().ModulesLocalVisibility) ||
443          !PP.CurSubmoduleState->VisibleModules.getGeneration())
444        return nullptr;
445
446      auto *Info = State.dyn_cast<ModuleMacroInfo*>();
447      if (!Info) {
448        Info = new (PP.getPreprocessorAllocator())
449            ModuleMacroInfo(State.get<MacroDirective *>());
450        State = Info;
451      }
452
453      if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
454          Info->ActiveModuleMacrosGeneration)
455        PP.updateModuleMacroInfo(II, *Info);
456      return Info;
457    }
458
459  public:
460    MacroState() : MacroState(nullptr) {}
461    MacroState(MacroDirective *MD) : State(MD) {}
462    MacroState(MacroState &&O) noexcept : State(O.State) {
463      O.State = (MacroDirective *)nullptr;
464    }
465    MacroState &operator=(MacroState &&O) noexcept {
466      auto S = O.State;
467      O.State = (MacroDirective *)nullptr;
468      State = S;
469      return *this;
470    }
471    ~MacroState() {
472      if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
473        Info->~ModuleMacroInfo();
474    }
475
476    MacroDirective *getLatest() const {
477      if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
478        return Info->MD;
479      return State.get<MacroDirective*>();
480    }
481    void setLatest(MacroDirective *MD) {
482      if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
483        Info->MD = MD;
484      else
485        State = MD;
486    }
487
488    bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
489      auto *Info = getModuleInfo(PP, II);
490      return Info ? Info->IsAmbiguous : false;
491    }
492    ArrayRef<ModuleMacro *>
493    getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
494      if (auto *Info = getModuleInfo(PP, II))
495        return Info->ActiveModuleMacros;
496      return None;
497    }
498
499    MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
500                                               SourceManager &SourceMgr) const {
501      // FIXME: Incorporate module macros into the result of this.
502      if (auto *Latest = getLatest())
503        return Latest->findDirectiveAtLoc(Loc, SourceMgr);
504      return MacroDirective::DefInfo();
505    }
506
507    void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
508      if (auto *Info = getModuleInfo(PP, II)) {
509        Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
510                                      Info->ActiveModuleMacros.begin(),
511                                      Info->ActiveModuleMacros.end());
512        Info->ActiveModuleMacros.clear();
513        Info->IsAmbiguous = false;
514      }
515    }
516    ArrayRef<ModuleMacro*> getOverriddenMacros() const {
517      if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
518        return Info->OverriddenMacros;
519      return None;
520    }
521    void setOverriddenMacros(Preprocessor &PP,
522                             ArrayRef<ModuleMacro *> Overrides) {
523      auto *Info = State.dyn_cast<ModuleMacroInfo*>();
524      if (!Info) {
525        if (Overrides.empty())
526          return;
527        Info = new (PP.getPreprocessorAllocator())
528            ModuleMacroInfo(State.get<MacroDirective *>());
529        State = Info;
530      }
531      Info->OverriddenMacros.clear();
532      Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
533                                    Overrides.begin(), Overrides.end());
534      Info->ActiveModuleMacrosGeneration = 0;
535    }
536  };
537
538  /// For each IdentifierInfo that was associated with a macro, we
539  /// keep a mapping to the history of all macro definitions and #undefs in
540  /// the reverse order (the latest one is in the head of the list).
541  ///
542  /// This mapping lives within the \p CurSubmoduleState.
543  typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap;
544
545  friend class ASTReader;
546
547  struct SubmoduleState;
548
549  /// \brief Information about a submodule that we're currently building.
550  struct BuildingSubmoduleInfo {
551    BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
552                          SubmoduleState *OuterSubmoduleState,
553                          unsigned OuterPendingModuleMacroNames)
554        : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
555          OuterSubmoduleState(OuterSubmoduleState),
556          OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
557
558    /// The module that we are building.
559    Module *M;
560    /// The location at which the module was included.
561    SourceLocation ImportLoc;
562    /// Whether we entered this submodule via a pragma.
563    bool IsPragma;
564    /// The previous SubmoduleState.
565    SubmoduleState *OuterSubmoduleState;
566    /// The number of pending module macro names when we started building this.
567    unsigned OuterPendingModuleMacroNames;
568  };
569  SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
570
571  /// \brief Information about a submodule's preprocessor state.
572  struct SubmoduleState {
573    /// The macros for the submodule.
574    MacroMap Macros;
575    /// The set of modules that are visible within the submodule.
576    VisibleModuleSet VisibleModules;
577    // FIXME: CounterValue?
578    // FIXME: PragmaPushMacroInfo?
579  };
580  std::map<Module*, SubmoduleState> Submodules;
581
582  /// The preprocessor state for preprocessing outside of any submodule.
583  SubmoduleState NullSubmoduleState;
584
585  /// The current submodule state. Will be \p NullSubmoduleState if we're not
586  /// in a submodule.
587  SubmoduleState *CurSubmoduleState;
588
589  /// The set of known macros exported from modules.
590  llvm::FoldingSet<ModuleMacro> ModuleMacros;
591
592  /// The names of potential module macros that we've not yet processed.
593  llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames;
594
595  /// The list of module macros, for each identifier, that are not overridden by
596  /// any other module macro.
597  llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>>
598      LeafModuleMacros;
599
600  /// \brief Macros that we want to warn because they are not used at the end
601  /// of the translation unit.
602  ///
603  /// We store just their SourceLocations instead of
604  /// something like MacroInfo*. The benefit of this is that when we are
605  /// deserializing from PCH, we don't need to deserialize identifier & macros
606  /// just so that we can report that they are unused, we just warn using
607  /// the SourceLocations of this set (that will be filled by the ASTReader).
608  /// We are using SmallPtrSet instead of a vector for faster removal.
609  typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
610  WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
611
612  /// \brief A "freelist" of MacroArg objects that can be
613  /// reused for quick allocation.
614  MacroArgs *MacroArgCache;
615  friend class MacroArgs;
616
617  /// For each IdentifierInfo used in a \#pragma push_macro directive,
618  /// we keep a MacroInfo stack used to restore the previous macro value.
619  llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
620
621  // Various statistics we track for performance analysis.
622  unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
623  unsigned NumIf, NumElse, NumEndif;
624  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
625  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
626  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
627  unsigned NumSkipped;
628
629  /// \brief The predefined macros that preprocessor should use from the
630  /// command line etc.
631  std::string Predefines;
632
633  /// \brief The file ID for the preprocessor predefines.
634  FileID PredefinesFileID;
635
636  /// \{
637  /// \brief Cache of macro expanders to reduce malloc traffic.
638  enum { TokenLexerCacheSize = 8 };
639  unsigned NumCachedTokenLexers;
640  std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
641  /// \}
642
643  /// \brief Keeps macro expanded tokens for TokenLexers.
644  //
645  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
646  /// going to lex in the cache and when it finishes the tokens are removed
647  /// from the end of the cache.
648  SmallVector<Token, 16> MacroExpandedTokens;
649  std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
650
651  /// \brief A record of the macro definitions and expansions that
652  /// occurred during preprocessing.
653  ///
654  /// This is an optional side structure that can be enabled with
655  /// \c createPreprocessingRecord() prior to preprocessing.
656  PreprocessingRecord *Record;
657
658  /// Cached tokens state.
659  typedef SmallVector<Token, 1> CachedTokensTy;
660
661  /// \brief Cached tokens are stored here when we do backtracking or
662  /// lookahead. They are "lexed" by the CachingLex() method.
663  CachedTokensTy CachedTokens;
664
665  /// \brief The position of the cached token that CachingLex() should
666  /// "lex" next.
667  ///
668  /// If it points beyond the CachedTokens vector, it means that a normal
669  /// Lex() should be invoked.
670  CachedTokensTy::size_type CachedLexPos;
671
672  /// \brief Stack of backtrack positions, allowing nested backtracks.
673  ///
674  /// The EnableBacktrackAtThisPos() method pushes a position to
675  /// indicate where CachedLexPos should be set when the BackTrack() method is
676  /// invoked (at which point the last position is popped).
677  std::vector<CachedTokensTy::size_type> BacktrackPositions;
678
679  struct MacroInfoChain {
680    MacroInfo MI;
681    MacroInfoChain *Next;
682  };
683
684  /// MacroInfos are managed as a chain for easy disposal.  This is the head
685  /// of that list.
686  MacroInfoChain *MIChainHead;
687
688  void updateOutOfDateIdentifier(IdentifierInfo &II) const;
689
690public:
691  Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
692               DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
693               MemoryBufferCache &PCMCache,
694               HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
695               IdentifierInfoLookup *IILookup = nullptr,
696               bool OwnsHeaderSearch = false,
697               TranslationUnitKind TUKind = TU_Complete);
698
699  ~Preprocessor();
700
701  /// \brief Initialize the preprocessor using information about the target.
702  ///
703  /// \param Target is owned by the caller and must remain valid for the
704  /// lifetime of the preprocessor.
705  /// \param AuxTarget is owned by the caller and must remain valid for
706  /// the lifetime of the preprocessor.
707  void Initialize(const TargetInfo &Target,
708                  const TargetInfo *AuxTarget = nullptr);
709
710  /// \brief Initialize the preprocessor to parse a model file
711  ///
712  /// To parse model files the preprocessor of the original source is reused to
713  /// preserver the identifier table. However to avoid some duplicate
714  /// information in the preprocessor some cleanup is needed before it is used
715  /// to parse model files. This method does that cleanup.
716  void InitializeForModelFile();
717
718  /// \brief Cleanup after model file parsing
719  void FinalizeForModelFile();
720
721  /// \brief Retrieve the preprocessor options used to initialize this
722  /// preprocessor.
723  PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
724
725  DiagnosticsEngine &getDiagnostics() const { return *Diags; }
726  void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
727
728  const LangOptions &getLangOpts() const { return LangOpts; }
729  const TargetInfo &getTargetInfo() const { return *Target; }
730  const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
731  FileManager &getFileManager() const { return FileMgr; }
732  SourceManager &getSourceManager() const { return SourceMgr; }
733  MemoryBufferCache &getPCMCache() const { return PCMCache; }
734  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
735
736  IdentifierTable &getIdentifierTable() { return Identifiers; }
737  const IdentifierTable &getIdentifierTable() const { return Identifiers; }
738  SelectorTable &getSelectorTable() { return Selectors; }
739  Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
740  llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
741
742  void setPTHManager(PTHManager* pm);
743
744  PTHManager *getPTHManager() { return PTH.get(); }
745
746  void setExternalSource(ExternalPreprocessorSource *Source) {
747    ExternalSource = Source;
748  }
749
750  ExternalPreprocessorSource *getExternalSource() const {
751    return ExternalSource;
752  }
753
754  /// \brief Retrieve the module loader associated with this preprocessor.
755  ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
756
757  bool hadModuleLoaderFatalFailure() const {
758    return TheModuleLoader.HadFatalFailure;
759  }
760
761  /// \brief True if we are currently preprocessing a #if or #elif directive
762  bool isParsingIfOrElifDirective() const {
763    return ParsingIfOrElifDirective;
764  }
765
766  /// \brief Control whether the preprocessor retains comments in output.
767  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
768    this->KeepComments = KeepComments | KeepMacroComments;
769    this->KeepMacroComments = KeepMacroComments;
770  }
771
772  bool getCommentRetentionState() const { return KeepComments; }
773
774  void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
775  bool getPragmasEnabled() const { return PragmasEnabled; }
776
777  void SetSuppressIncludeNotFoundError(bool Suppress) {
778    SuppressIncludeNotFoundError = Suppress;
779  }
780
781  bool GetSuppressIncludeNotFoundError() {
782    return SuppressIncludeNotFoundError;
783  }
784
785  /// Sets whether the preprocessor is responsible for producing output or if
786  /// it is producing tokens to be consumed by Parse and Sema.
787  void setPreprocessedOutput(bool IsPreprocessedOutput) {
788    PreprocessedOutput = IsPreprocessedOutput;
789  }
790
791  /// Returns true if the preprocessor is responsible for generating output,
792  /// false if it is producing tokens to be consumed by Parse and Sema.
793  bool isPreprocessedOutput() const { return PreprocessedOutput; }
794
795  /// \brief Return true if we are lexing directly from the specified lexer.
796  bool isCurrentLexer(const PreprocessorLexer *L) const {
797    return CurPPLexer == L;
798  }
799
800  /// \brief Return the current lexer being lexed from.
801  ///
802  /// Note that this ignores any potentially active macro expansions and _Pragma
803  /// expansions going on at the time.
804  PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
805
806  /// \brief Return the current file lexer being lexed from.
807  ///
808  /// Note that this ignores any potentially active macro expansions and _Pragma
809  /// expansions going on at the time.
810  PreprocessorLexer *getCurrentFileLexer() const;
811
812  /// \brief Return the submodule owning the file being lexed. This may not be
813  /// the current module if we have changed modules since entering the file.
814  Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
815
816  /// \brief Returns the FileID for the preprocessor predefines.
817  FileID getPredefinesFileID() const { return PredefinesFileID; }
818
819  /// \{
820  /// \brief Accessors for preprocessor callbacks.
821  ///
822  /// Note that this class takes ownership of any PPCallbacks object given to
823  /// it.
824  PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
825  void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
826    if (Callbacks)
827      C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
828                                                std::move(Callbacks));
829    Callbacks = std::move(C);
830  }
831  /// \}
832
833  bool isMacroDefined(StringRef Id) {
834    return isMacroDefined(&Identifiers.get(Id));
835  }
836  bool isMacroDefined(const IdentifierInfo *II) {
837    return II->hasMacroDefinition() &&
838           (!getLangOpts().Modules || (bool)getMacroDefinition(II));
839  }
840
841  /// \brief Determine whether II is defined as a macro within the module M,
842  /// if that is a module that we've already preprocessed. Does not check for
843  /// macros imported into M.
844  bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
845    if (!II->hasMacroDefinition())
846      return false;
847    auto I = Submodules.find(M);
848    if (I == Submodules.end())
849      return false;
850    auto J = I->second.Macros.find(II);
851    if (J == I->second.Macros.end())
852      return false;
853    auto *MD = J->second.getLatest();
854    return MD && MD->isDefined();
855  }
856
857  MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
858    if (!II->hasMacroDefinition())
859      return MacroDefinition();
860
861    MacroState &S = CurSubmoduleState->Macros[II];
862    auto *MD = S.getLatest();
863    while (MD && isa<VisibilityMacroDirective>(MD))
864      MD = MD->getPrevious();
865    return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
866                           S.getActiveModuleMacros(*this, II),
867                           S.isAmbiguous(*this, II));
868  }
869
870  MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
871                                          SourceLocation Loc) {
872    if (!II->hadMacroDefinition())
873      return MacroDefinition();
874
875    MacroState &S = CurSubmoduleState->Macros[II];
876    MacroDirective::DefInfo DI;
877    if (auto *MD = S.getLatest())
878      DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
879    // FIXME: Compute the set of active module macros at the specified location.
880    return MacroDefinition(DI.getDirective(),
881                           S.getActiveModuleMacros(*this, II),
882                           S.isAmbiguous(*this, II));
883  }
884
885  /// \brief Given an identifier, return its latest non-imported MacroDirective
886  /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
887  MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
888    if (!II->hasMacroDefinition())
889      return nullptr;
890
891    auto *MD = getLocalMacroDirectiveHistory(II);
892    if (!MD || MD->getDefinition().isUndefined())
893      return nullptr;
894
895    return MD;
896  }
897
898  const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
899    return const_cast<Preprocessor*>(this)->getMacroInfo(II);
900  }
901
902  MacroInfo *getMacroInfo(const IdentifierInfo *II) {
903    if (!II->hasMacroDefinition())
904      return nullptr;
905    if (auto MD = getMacroDefinition(II))
906      return MD.getMacroInfo();
907    return nullptr;
908  }
909
910  /// \brief Given an identifier, return the latest non-imported macro
911  /// directive for that identifier.
912  ///
913  /// One can iterate over all previous macro directives from the most recent
914  /// one.
915  MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
916
917  /// \brief Add a directive to the macro directive history for this identifier.
918  void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
919  DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
920                                             SourceLocation Loc) {
921    DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
922    appendMacroDirective(II, MD);
923    return MD;
924  }
925  DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
926                                             MacroInfo *MI) {
927    return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
928  }
929  /// \brief Set a MacroDirective that was loaded from a PCH file.
930  void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
931                               MacroDirective *MD);
932
933  /// \brief Register an exported macro for a module and identifier.
934  ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
935                              ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
936  ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
937
938  /// \brief Get the list of leaf (non-overridden) module macros for a name.
939  ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
940    if (II->isOutOfDate())
941      updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
942    auto I = LeafModuleMacros.find(II);
943    if (I != LeafModuleMacros.end())
944      return I->second;
945    return None;
946  }
947
948  /// \{
949  /// Iterators for the macro history table. Currently defined macros have
950  /// IdentifierInfo::hasMacroDefinition() set and an empty
951  /// MacroInfo::getUndefLoc() at the head of the list.
952  typedef MacroMap::const_iterator macro_iterator;
953  macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
954  macro_iterator macro_end(bool IncludeExternalMacros = true) const;
955  llvm::iterator_range<macro_iterator>
956  macros(bool IncludeExternalMacros = true) const {
957    return llvm::make_range(macro_begin(IncludeExternalMacros),
958                            macro_end(IncludeExternalMacros));
959  }
960  /// \}
961
962  /// \brief Return the name of the macro defined before \p Loc that has
963  /// spelling \p Tokens.  If there are multiple macros with same spelling,
964  /// return the last one defined.
965  StringRef getLastMacroWithSpelling(SourceLocation Loc,
966                                     ArrayRef<TokenValue> Tokens) const;
967
968  const std::string &getPredefines() const { return Predefines; }
969  /// \brief Set the predefines for this Preprocessor.
970  ///
971  /// These predefines are automatically injected when parsing the main file.
972  void setPredefines(const char *P) { Predefines = P; }
973  void setPredefines(StringRef P) { Predefines = P; }
974
975  /// Return information about the specified preprocessor
976  /// identifier token.
977  IdentifierInfo *getIdentifierInfo(StringRef Name) const {
978    return &Identifiers.get(Name);
979  }
980
981  /// \brief Add the specified pragma handler to this preprocessor.
982  ///
983  /// If \p Namespace is non-null, then it is a token required to exist on the
984  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
985  void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
986  void AddPragmaHandler(PragmaHandler *Handler) {
987    AddPragmaHandler(StringRef(), Handler);
988  }
989
990  /// \brief Remove the specific pragma handler from this preprocessor.
991  ///
992  /// If \p Namespace is non-null, then it should be the namespace that
993  /// \p Handler was added to. It is an error to remove a handler that
994  /// has not been registered.
995  void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
996  void RemovePragmaHandler(PragmaHandler *Handler) {
997    RemovePragmaHandler(StringRef(), Handler);
998  }
999
1000  /// Install empty handlers for all pragmas (making them ignored).
1001  void IgnorePragmas();
1002
1003  /// \brief Add the specified comment handler to the preprocessor.
1004  void addCommentHandler(CommentHandler *Handler);
1005
1006  /// \brief Remove the specified comment handler.
1007  ///
1008  /// It is an error to remove a handler that has not been registered.
1009  void removeCommentHandler(CommentHandler *Handler);
1010
1011  /// \brief Set the code completion handler to the given object.
1012  void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1013    CodeComplete = &Handler;
1014  }
1015
1016  /// \brief Retrieve the current code-completion handler.
1017  CodeCompletionHandler *getCodeCompletionHandler() const {
1018    return CodeComplete;
1019  }
1020
1021  /// \brief Clear out the code completion handler.
1022  void clearCodeCompletionHandler() {
1023    CodeComplete = nullptr;
1024  }
1025
1026  /// \brief Hook used by the lexer to invoke the "natural language" code
1027  /// completion point.
1028  void CodeCompleteNaturalLanguage();
1029
1030  /// \brief Set the code completion token for filtering purposes.
1031  void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1032    CodeCompletionII = Filter;
1033  }
1034
1035  /// \brief Get the code completion token for filtering purposes.
1036  StringRef getCodeCompletionFilter() {
1037    if (CodeCompletionII)
1038      return CodeCompletionII->getName();
1039    return {};
1040  }
1041
1042  /// \brief Retrieve the preprocessing record, or NULL if there is no
1043  /// preprocessing record.
1044  PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1045
1046  /// \brief Create a new preprocessing record, which will keep track of
1047  /// all macro expansions, macro definitions, etc.
1048  void createPreprocessingRecord();
1049
1050  /// \brief Enter the specified FileID as the main source file,
1051  /// which implicitly adds the builtin defines etc.
1052  void EnterMainSourceFile();
1053
1054  /// \brief Inform the preprocessor callbacks that processing is complete.
1055  void EndSourceFile();
1056
1057  /// \brief Add a source file to the top of the include stack and
1058  /// start lexing tokens from it instead of the current buffer.
1059  ///
1060  /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1061  bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
1062                       SourceLocation Loc);
1063
1064  /// \brief Add a Macro to the top of the include stack and start lexing
1065  /// tokens from it instead of the current buffer.
1066  ///
1067  /// \param Args specifies the tokens input to a function-like macro.
1068  /// \param ILEnd specifies the location of the ')' for a function-like macro
1069  /// or the identifier for an object-like macro.
1070  void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
1071                  MacroArgs *Args);
1072
1073  /// \brief Add a "macro" context to the top of the include stack,
1074  /// which will cause the lexer to start returning the specified tokens.
1075  ///
1076  /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1077  /// will not be subject to further macro expansion. Otherwise, these tokens
1078  /// will be re-macro-expanded when/if expansion is enabled.
1079  ///
1080  /// If \p OwnsTokens is false, this method assumes that the specified stream
1081  /// of tokens has a permanent owner somewhere, so they do not need to be
1082  /// copied. If it is true, it assumes the array of tokens is allocated with
1083  /// \c new[] and the Preprocessor will delete[] it.
1084private:
1085  void EnterTokenStream(const Token *Toks, unsigned NumToks,
1086                        bool DisableMacroExpansion, bool OwnsTokens);
1087
1088public:
1089  void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1090                        bool DisableMacroExpansion) {
1091    EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
1092  }
1093  void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
1094    EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
1095  }
1096
1097  /// \brief Pop the current lexer/macro exp off the top of the lexer stack.
1098  ///
1099  /// This should only be used in situations where the current state of the
1100  /// top-of-stack lexer is known.
1101  void RemoveTopOfLexerStack();
1102
1103  /// From the point that this method is called, and until
1104  /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1105  /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1106  /// make the Preprocessor re-lex the same tokens.
1107  ///
1108  /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1109  /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1110  /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1111  ///
1112  /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1113  /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1114  /// tokens will continue indefinitely.
1115  ///
1116  void EnableBacktrackAtThisPos();
1117
1118  /// \brief Disable the last EnableBacktrackAtThisPos call.
1119  void CommitBacktrackedTokens();
1120
1121  struct CachedTokensRange {
1122    CachedTokensTy::size_type Begin, End;
1123  };
1124
1125private:
1126  /// \brief A range of cached tokens that should be erased after lexing
1127  /// when backtracking requires the erasure of such cached tokens.
1128  Optional<CachedTokensRange> CachedTokenRangeToErase;
1129
1130public:
1131  /// \brief Returns the range of cached tokens that were lexed since
1132  /// EnableBacktrackAtThisPos() was previously called.
1133  CachedTokensRange LastCachedTokenRange();
1134
1135  /// \brief Erase the range of cached tokens that were lexed since
1136  /// EnableBacktrackAtThisPos() was previously called.
1137  void EraseCachedTokens(CachedTokensRange TokenRange);
1138
1139  /// \brief Make Preprocessor re-lex the tokens that were lexed since
1140  /// EnableBacktrackAtThisPos() was previously called.
1141  void Backtrack();
1142
1143  /// \brief True if EnableBacktrackAtThisPos() was called and
1144  /// caching of tokens is on.
1145  bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1146
1147  /// \brief Lex the next token for this preprocessor.
1148  void Lex(Token &Result);
1149
1150  void LexAfterModuleImport(Token &Result);
1151
1152  void makeModuleVisible(Module *M, SourceLocation Loc);
1153
1154  SourceLocation getModuleImportLoc(Module *M) const {
1155    return CurSubmoduleState->VisibleModules.getImportLoc(M);
1156  }
1157
1158  /// \brief Lex a string literal, which may be the concatenation of multiple
1159  /// string literals and may even come from macro expansion.
1160  /// \returns true on success, false if a error diagnostic has been generated.
1161  bool LexStringLiteral(Token &Result, std::string &String,
1162                        const char *DiagnosticTag, bool AllowMacroExpansion) {
1163    if (AllowMacroExpansion)
1164      Lex(Result);
1165    else
1166      LexUnexpandedToken(Result);
1167    return FinishLexStringLiteral(Result, String, DiagnosticTag,
1168                                  AllowMacroExpansion);
1169  }
1170
1171  /// \brief Complete the lexing of a string literal where the first token has
1172  /// already been lexed (see LexStringLiteral).
1173  bool FinishLexStringLiteral(Token &Result, std::string &String,
1174                              const char *DiagnosticTag,
1175                              bool AllowMacroExpansion);
1176
1177  /// \brief Lex a token.  If it's a comment, keep lexing until we get
1178  /// something not a comment.
1179  ///
1180  /// This is useful in -E -C mode where comments would foul up preprocessor
1181  /// directive handling.
1182  void LexNonComment(Token &Result) {
1183    do
1184      Lex(Result);
1185    while (Result.getKind() == tok::comment);
1186  }
1187
1188  /// \brief Just like Lex, but disables macro expansion of identifier tokens.
1189  void LexUnexpandedToken(Token &Result) {
1190    // Disable macro expansion.
1191    bool OldVal = DisableMacroExpansion;
1192    DisableMacroExpansion = true;
1193    // Lex the token.
1194    Lex(Result);
1195
1196    // Reenable it.
1197    DisableMacroExpansion = OldVal;
1198  }
1199
1200  /// \brief Like LexNonComment, but this disables macro expansion of
1201  /// identifier tokens.
1202  void LexUnexpandedNonComment(Token &Result) {
1203    do
1204      LexUnexpandedToken(Result);
1205    while (Result.getKind() == tok::comment);
1206  }
1207
1208  /// \brief Parses a simple integer literal to get its numeric value.  Floating
1209  /// point literals and user defined literals are rejected.  Used primarily to
1210  /// handle pragmas that accept integer arguments.
1211  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1212
1213  /// Disables macro expansion everywhere except for preprocessor directives.
1214  void SetMacroExpansionOnlyInDirectives() {
1215    DisableMacroExpansion = true;
1216    MacroExpansionInDirectivesOverride = true;
1217  }
1218
1219  /// \brief Peeks ahead N tokens and returns that token without consuming any
1220  /// tokens.
1221  ///
1222  /// LookAhead(0) returns the next token that would be returned by Lex(),
1223  /// LookAhead(1) returns the token after it, etc.  This returns normal
1224  /// tokens after phase 5.  As such, it is equivalent to using
1225  /// 'Lex', not 'LexUnexpandedToken'.
1226  const Token &LookAhead(unsigned N) {
1227    if (CachedLexPos + N < CachedTokens.size())
1228      return CachedTokens[CachedLexPos+N];
1229    else
1230      return PeekAhead(N+1);
1231  }
1232
1233  /// \brief When backtracking is enabled and tokens are cached,
1234  /// this allows to revert a specific number of tokens.
1235  ///
1236  /// Note that the number of tokens being reverted should be up to the last
1237  /// backtrack position, not more.
1238  void RevertCachedTokens(unsigned N) {
1239    assert(isBacktrackEnabled() &&
1240           "Should only be called when tokens are cached for backtracking");
1241    assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1242         && "Should revert tokens up to the last backtrack position, not more");
1243    assert(signed(CachedLexPos) - signed(N) >= 0 &&
1244           "Corrupted backtrack positions ?");
1245    CachedLexPos -= N;
1246  }
1247
1248  /// \brief Enters a token in the token stream to be lexed next.
1249  ///
1250  /// If BackTrack() is called afterwards, the token will remain at the
1251  /// insertion point.
1252  void EnterToken(const Token &Tok) {
1253    EnterCachingLexMode();
1254    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1255  }
1256
1257  /// We notify the Preprocessor that if it is caching tokens (because
1258  /// backtrack is enabled) it should replace the most recent cached tokens
1259  /// with the given annotation token. This function has no effect if
1260  /// backtracking is not enabled.
1261  ///
1262  /// Note that the use of this function is just for optimization, so that the
1263  /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1264  /// invoked.
1265  void AnnotateCachedTokens(const Token &Tok) {
1266    assert(Tok.isAnnotation() && "Expected annotation token");
1267    if (CachedLexPos != 0 && isBacktrackEnabled())
1268      AnnotatePreviousCachedTokens(Tok);
1269  }
1270
1271  /// Get the location of the last cached token, suitable for setting the end
1272  /// location of an annotation token.
1273  SourceLocation getLastCachedTokenLocation() const {
1274    assert(CachedLexPos != 0);
1275    return CachedTokens[CachedLexPos-1].getLastLoc();
1276  }
1277
1278  /// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1279  /// CachedTokens.
1280  bool IsPreviousCachedToken(const Token &Tok) const;
1281
1282  /// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1283  /// in \p NewToks.
1284  ///
1285  /// Useful when a token needs to be split in smaller ones and CachedTokens
1286  /// most recent token must to be updated to reflect that.
1287  void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1288
1289  /// \brief Replace the last token with an annotation token.
1290  ///
1291  /// Like AnnotateCachedTokens(), this routine replaces an
1292  /// already-parsed (and resolved) token with an annotation
1293  /// token. However, this routine only replaces the last token with
1294  /// the annotation token; it does not affect any other cached
1295  /// tokens. This function has no effect if backtracking is not
1296  /// enabled.
1297  void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1298    assert(Tok.isAnnotation() && "Expected annotation token");
1299    if (CachedLexPos != 0 && isBacktrackEnabled())
1300      CachedTokens[CachedLexPos-1] = Tok;
1301  }
1302
1303  /// Enter an annotation token into the token stream.
1304  void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1305                            void *AnnotationVal);
1306
1307  /// Update the current token to represent the provided
1308  /// identifier, in order to cache an action performed by typo correction.
1309  void TypoCorrectToken(const Token &Tok) {
1310    assert(Tok.getIdentifierInfo() && "Expected identifier token");
1311    if (CachedLexPos != 0 && isBacktrackEnabled())
1312      CachedTokens[CachedLexPos-1] = Tok;
1313  }
1314
1315  /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
1316  /// CurTokenLexer pointers.
1317  void recomputeCurLexerKind();
1318
1319  /// \brief Returns true if incremental processing is enabled
1320  bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1321
1322  /// \brief Enables the incremental processing
1323  void enableIncrementalProcessing(bool value = true) {
1324    IncrementalProcessing = value;
1325  }
1326
1327  /// \brief Specify the point at which code-completion will be performed.
1328  ///
1329  /// \param File the file in which code completion should occur. If
1330  /// this file is included multiple times, code-completion will
1331  /// perform completion the first time it is included. If NULL, this
1332  /// function clears out the code-completion point.
1333  ///
1334  /// \param Line the line at which code completion should occur
1335  /// (1-based).
1336  ///
1337  /// \param Column the column at which code completion should occur
1338  /// (1-based).
1339  ///
1340  /// \returns true if an error occurred, false otherwise.
1341  bool SetCodeCompletionPoint(const FileEntry *File,
1342                              unsigned Line, unsigned Column);
1343
1344  /// \brief Determine if we are performing code completion.
1345  bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1346
1347  /// \brief Returns the location of the code-completion point.
1348  ///
1349  /// Returns an invalid location if code-completion is not enabled or the file
1350  /// containing the code-completion point has not been lexed yet.
1351  SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1352
1353  /// \brief Returns the start location of the file of code-completion point.
1354  ///
1355  /// Returns an invalid location if code-completion is not enabled or the file
1356  /// containing the code-completion point has not been lexed yet.
1357  SourceLocation getCodeCompletionFileLoc() const {
1358    return CodeCompletionFileLoc;
1359  }
1360
1361  /// \brief Returns true if code-completion is enabled and we have hit the
1362  /// code-completion point.
1363  bool isCodeCompletionReached() const { return CodeCompletionReached; }
1364
1365  /// \brief Note that we hit the code-completion point.
1366  void setCodeCompletionReached() {
1367    assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1368    CodeCompletionReached = true;
1369    // Silence any diagnostics that occur after we hit the code-completion.
1370    getDiagnostics().setSuppressAllDiagnostics(true);
1371  }
1372
1373  /// \brief The location of the currently-active \#pragma clang
1374  /// arc_cf_code_audited begin.
1375  ///
1376  /// Returns an invalid location if there is no such pragma active.
1377  SourceLocation getPragmaARCCFCodeAuditedLoc() const {
1378    return PragmaARCCFCodeAuditedLoc;
1379  }
1380
1381  /// \brief Set the location of the currently-active \#pragma clang
1382  /// arc_cf_code_audited begin.  An invalid location ends the pragma.
1383  void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
1384    PragmaARCCFCodeAuditedLoc = Loc;
1385  }
1386
1387  /// \brief The location of the currently-active \#pragma clang
1388  /// assume_nonnull begin.
1389  ///
1390  /// Returns an invalid location if there is no such pragma active.
1391  SourceLocation getPragmaAssumeNonNullLoc() const {
1392    return PragmaAssumeNonNullLoc;
1393  }
1394
1395  /// \brief Set the location of the currently-active \#pragma clang
1396  /// assume_nonnull begin.  An invalid location ends the pragma.
1397  void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1398    PragmaAssumeNonNullLoc = Loc;
1399  }
1400
1401  /// \brief Set the directory in which the main file should be considered
1402  /// to have been found, if it is not a real file.
1403  void setMainFileDir(const DirectoryEntry *Dir) {
1404    MainFileDir = Dir;
1405  }
1406
1407  /// \brief Instruct the preprocessor to skip part of the main source file.
1408  ///
1409  /// \param Bytes The number of bytes in the preamble to skip.
1410  ///
1411  /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1412  /// start of a line.
1413  void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1414    SkipMainFilePreamble.first = Bytes;
1415    SkipMainFilePreamble.second = StartOfLine;
1416  }
1417
1418  /// Forwarding function for diagnostics.  This emits a diagnostic at
1419  /// the specified Token's location, translating the token's start
1420  /// position in the current buffer into a SourcePosition object for rendering.
1421  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1422    return Diags->Report(Loc, DiagID);
1423  }
1424
1425  DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1426    return Diags->Report(Tok.getLocation(), DiagID);
1427  }
1428
1429  /// Return the 'spelling' of the token at the given
1430  /// location; does not go up to the spelling location or down to the
1431  /// expansion location.
1432  ///
1433  /// \param buffer A buffer which will be used only if the token requires
1434  ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1435  /// \param invalid If non-null, will be set \c true if an error occurs.
1436  StringRef getSpelling(SourceLocation loc,
1437                        SmallVectorImpl<char> &buffer,
1438                        bool *invalid = nullptr) const {
1439    return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1440  }
1441
1442  /// \brief Return the 'spelling' of the Tok token.
1443  ///
1444  /// The spelling of a token is the characters used to represent the token in
1445  /// the source file after trigraph expansion and escaped-newline folding.  In
1446  /// particular, this wants to get the true, uncanonicalized, spelling of
1447  /// things like digraphs, UCNs, etc.
1448  ///
1449  /// \param Invalid If non-null, will be set \c true if an error occurs.
1450  std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1451    return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1452  }
1453
1454  /// \brief Get the spelling of a token into a preallocated buffer, instead
1455  /// of as an std::string.
1456  ///
1457  /// The caller is required to allocate enough space for the token, which is
1458  /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1459  /// actual result is returned.
1460  ///
1461  /// Note that this method may do two possible things: it may either fill in
1462  /// the buffer specified with characters, or it may *change the input pointer*
1463  /// to point to a constant buffer with the data already in it (avoiding a
1464  /// copy).  The caller is not allowed to modify the returned buffer pointer
1465  /// if an internal buffer is returned.
1466  unsigned getSpelling(const Token &Tok, const char *&Buffer,
1467                       bool *Invalid = nullptr) const {
1468    return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1469  }
1470
1471  /// \brief Get the spelling of a token into a SmallVector.
1472  ///
1473  /// Note that the returned StringRef may not point to the
1474  /// supplied buffer if a copy can be avoided.
1475  StringRef getSpelling(const Token &Tok,
1476                        SmallVectorImpl<char> &Buffer,
1477                        bool *Invalid = nullptr) const;
1478
1479  /// \brief Relex the token at the specified location.
1480  /// \returns true if there was a failure, false on success.
1481  bool getRawToken(SourceLocation Loc, Token &Result,
1482                   bool IgnoreWhiteSpace = false) {
1483    return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1484  }
1485
1486  /// \brief Given a Token \p Tok that is a numeric constant with length 1,
1487  /// return the character.
1488  char
1489  getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1490                                              bool *Invalid = nullptr) const {
1491    assert(Tok.is(tok::numeric_constant) &&
1492           Tok.getLength() == 1 && "Called on unsupported token");
1493    assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1494
1495    // If the token is carrying a literal data pointer, just use it.
1496    if (const char *D = Tok.getLiteralData())
1497      return *D;
1498
1499    // Otherwise, fall back on getCharacterData, which is slower, but always
1500    // works.
1501    return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1502  }
1503
1504  /// \brief Retrieve the name of the immediate macro expansion.
1505  ///
1506  /// This routine starts from a source location, and finds the name of the
1507  /// macro responsible for its immediate expansion. It looks through any
1508  /// intervening macro argument expansions to compute this. It returns a
1509  /// StringRef that refers to the SourceManager-owned buffer of the source
1510  /// where that macro name is spelled. Thus, the result shouldn't out-live
1511  /// the SourceManager.
1512  StringRef getImmediateMacroName(SourceLocation Loc) {
1513    return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1514  }
1515
1516  /// \brief Plop the specified string into a scratch buffer and set the
1517  /// specified token's location and length to it.
1518  ///
1519  /// If specified, the source location provides a location of the expansion
1520  /// point of the token.
1521  void CreateString(StringRef Str, Token &Tok,
1522                    SourceLocation ExpansionLocStart = SourceLocation(),
1523                    SourceLocation ExpansionLocEnd = SourceLocation());
1524
1525  /// \brief Computes the source location just past the end of the
1526  /// token at this source location.
1527  ///
1528  /// This routine can be used to produce a source location that
1529  /// points just past the end of the token referenced by \p Loc, and
1530  /// is generally used when a diagnostic needs to point just after a
1531  /// token where it expected something different that it received. If
1532  /// the returned source location would not be meaningful (e.g., if
1533  /// it points into a macro), this routine returns an invalid
1534  /// source location.
1535  ///
1536  /// \param Offset an offset from the end of the token, where the source
1537  /// location should refer to. The default offset (0) produces a source
1538  /// location pointing just past the end of the token; an offset of 1 produces
1539  /// a source location pointing to the last character in the token, etc.
1540  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1541    return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1542  }
1543
1544  /// \brief Returns true if the given MacroID location points at the first
1545  /// token of the macro expansion.
1546  ///
1547  /// \param MacroBegin If non-null and function returns true, it is set to
1548  /// begin location of the macro.
1549  bool isAtStartOfMacroExpansion(SourceLocation loc,
1550                                 SourceLocation *MacroBegin = nullptr) const {
1551    return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1552                                            MacroBegin);
1553  }
1554
1555  /// \brief Returns true if the given MacroID location points at the last
1556  /// token of the macro expansion.
1557  ///
1558  /// \param MacroEnd If non-null and function returns true, it is set to
1559  /// end location of the macro.
1560  bool isAtEndOfMacroExpansion(SourceLocation loc,
1561                               SourceLocation *MacroEnd = nullptr) const {
1562    return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1563  }
1564
1565  /// \brief Print the token to stderr, used for debugging.
1566  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1567  void DumpLocation(SourceLocation Loc) const;
1568  void DumpMacro(const MacroInfo &MI) const;
1569  void dumpMacroInfo(const IdentifierInfo *II);
1570
1571  /// \brief Given a location that specifies the start of a
1572  /// token, return a new location that specifies a character within the token.
1573  SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1574                                         unsigned Char) const {
1575    return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1576  }
1577
1578  /// \brief Increment the counters for the number of token paste operations
1579  /// performed.
1580  ///
1581  /// If fast was specified, this is a 'fast paste' case we handled.
1582  void IncrementPasteCounter(bool isFast) {
1583    if (isFast)
1584      ++NumFastTokenPaste;
1585    else
1586      ++NumTokenPaste;
1587  }
1588
1589  void PrintStats();
1590
1591  size_t getTotalMemory() const;
1592
1593  /// When the macro expander pastes together a comment (/##/) in Microsoft
1594  /// mode, this method handles updating the current state, returning the
1595  /// token on the next source line.
1596  void HandleMicrosoftCommentPaste(Token &Tok);
1597
1598  //===--------------------------------------------------------------------===//
1599  // Preprocessor callback methods.  These are invoked by a lexer as various
1600  // directives and events are found.
1601
1602  /// Given a tok::raw_identifier token, look up the
1603  /// identifier information for the token and install it into the token,
1604  /// updating the token kind accordingly.
1605  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1606
1607private:
1608  llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1609
1610public:
1611
1612  /// \brief Specifies the reason for poisoning an identifier.
1613  ///
1614  /// If that identifier is accessed while poisoned, then this reason will be
1615  /// used instead of the default "poisoned" diagnostic.
1616  void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1617
1618  /// \brief Display reason for poisoned identifier.
1619  void HandlePoisonedIdentifier(Token & Tok);
1620
1621  void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1622    if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1623      if(II->isPoisoned()) {
1624        HandlePoisonedIdentifier(Identifier);
1625      }
1626    }
1627  }
1628
1629private:
1630  /// Identifiers used for SEH handling in Borland. These are only
1631  /// allowed in particular circumstances
1632  // __except block
1633  IdentifierInfo *Ident__exception_code,
1634                 *Ident___exception_code,
1635                 *Ident_GetExceptionCode;
1636  // __except filter expression
1637  IdentifierInfo *Ident__exception_info,
1638                 *Ident___exception_info,
1639                 *Ident_GetExceptionInfo;
1640  // __finally
1641  IdentifierInfo *Ident__abnormal_termination,
1642                 *Ident___abnormal_termination,
1643                 *Ident_AbnormalTermination;
1644
1645  const char *getCurLexerEndPos();
1646  void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1647
1648public:
1649  void PoisonSEHIdentifiers(bool Poison = true); // Borland
1650
1651  /// \brief Callback invoked when the lexer reads an identifier and has
1652  /// filled in the tokens IdentifierInfo member.
1653  ///
1654  /// This callback potentially macro expands it or turns it into a named
1655  /// token (like 'for').
1656  ///
1657  /// \returns true if we actually computed a token, false if we need to
1658  /// lex again.
1659  bool HandleIdentifier(Token &Identifier);
1660
1661
1662  /// \brief Callback invoked when the lexer hits the end of the current file.
1663  ///
1664  /// This either returns the EOF token and returns true, or
1665  /// pops a level off the include stack and returns false, at which point the
1666  /// client should call lex again.
1667  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1668
1669  /// \brief Callback invoked when the current TokenLexer hits the end of its
1670  /// token stream.
1671  bool HandleEndOfTokenLexer(Token &Result);
1672
1673  /// \brief Callback invoked when the lexer sees a # token at the start of a
1674  /// line.
1675  ///
1676  /// This consumes the directive, modifies the lexer/preprocessor state, and
1677  /// advances the lexer(s) so that the next token read is the correct one.
1678  void HandleDirective(Token &Result);
1679
1680  /// \brief Ensure that the next token is a tok::eod token.
1681  ///
1682  /// If not, emit a diagnostic and consume up until the eod.
1683  /// If \p EnableMacros is true, then we consider macros that expand to zero
1684  /// tokens as being ok.
1685  void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1686
1687  /// \brief Read and discard all tokens remaining on the current line until
1688  /// the tok::eod token is found.
1689  void DiscardUntilEndOfDirective();
1690
1691  /// \brief Returns true if the preprocessor has seen a use of
1692  /// __DATE__ or __TIME__ in the file so far.
1693  bool SawDateOrTime() const {
1694    return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1695  }
1696  unsigned getCounterValue() const { return CounterValue; }
1697  void setCounterValue(unsigned V) { CounterValue = V; }
1698
1699  /// \brief Retrieves the module that we're currently building, if any.
1700  Module *getCurrentModule();
1701
1702  /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
1703  MacroInfo *AllocateMacroInfo(SourceLocation L);
1704
1705  /// \brief Turn the specified lexer token into a fully checked and spelled
1706  /// filename, e.g. as an operand of \#include.
1707  ///
1708  /// The caller is expected to provide a buffer that is large enough to hold
1709  /// the spelling of the filename, but is also expected to handle the case
1710  /// when this method decides to use a different buffer.
1711  ///
1712  /// \returns true if the input filename was in <>'s or false if it was
1713  /// in ""'s.
1714  bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1715
1716  /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
1717  ///
1718  /// Returns null on failure.  \p isAngled indicates whether the file
1719  /// reference is for system \#include's or not (i.e. using <> instead of "").
1720  const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1721                              bool isAngled, const DirectoryLookup *FromDir,
1722                              const FileEntry *FromFile,
1723                              const DirectoryLookup *&CurDir,
1724                              SmallVectorImpl<char> *SearchPath,
1725                              SmallVectorImpl<char> *RelativePath,
1726                              ModuleMap::KnownHeader *SuggestedModule,
1727                              bool *IsMapped, bool SkipCache = false);
1728
1729  /// \brief Get the DirectoryLookup structure used to find the current
1730  /// FileEntry, if CurLexer is non-null and if applicable.
1731  ///
1732  /// This allows us to implement \#include_next and find directory-specific
1733  /// properties.
1734  const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1735
1736  /// \brief Return true if we're in the top-level file, not in a \#include.
1737  bool isInPrimaryFile() const;
1738
1739  /// \brief Handle cases where the \#include name is expanded
1740  /// from a macro as multiple tokens, which need to be glued together.
1741  ///
1742  /// This occurs for code like:
1743  /// \code
1744  ///    \#define FOO <x/y.h>
1745  ///    \#include FOO
1746  /// \endcode
1747  /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1748  ///
1749  /// This code concatenates and consumes tokens up to the '>' token.  It
1750  /// returns false if the > was found, otherwise it returns true if it finds
1751  /// and consumes the EOD marker.
1752  bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1753                              SourceLocation &End);
1754
1755  /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1756  /// followed by EOD.  Return true if the token is not a valid on-off-switch.
1757  bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1758
1759  bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1760                      bool *ShadowFlag = nullptr);
1761
1762  void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
1763  Module *LeaveSubmodule(bool ForPragma);
1764
1765private:
1766  void PushIncludeMacroStack() {
1767    assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1768    IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
1769                                   std::move(CurLexer), std::move(CurPTHLexer),
1770                                   CurPPLexer, std::move(CurTokenLexer),
1771                                   CurDirLookup);
1772    CurPPLexer = nullptr;
1773  }
1774
1775  void PopIncludeMacroStack() {
1776    CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1777    CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1778    CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1779    CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1780    CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
1781    CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
1782    CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1783    IncludeMacroStack.pop_back();
1784  }
1785
1786  void PropagateLineStartLeadingSpaceInfo(Token &Result);
1787
1788  /// Determine whether we need to create module macros for #defines in the
1789  /// current context.
1790  bool needModuleMacros() const;
1791
1792  /// Update the set of active module macros and ambiguity flag for a module
1793  /// macro name.
1794  void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
1795
1796  DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1797                                               SourceLocation Loc);
1798  UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1799  VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1800                                                             bool isPublic);
1801
1802  /// \brief Lex and validate a macro name, which occurs after a
1803  /// \#define or \#undef.
1804  ///
1805  /// \param MacroNameTok Token that represents the name defined or undefined.
1806  /// \param IsDefineUndef Kind if preprocessor directive.
1807  /// \param ShadowFlag Points to flag that is set if macro name shadows
1808  ///                   a keyword.
1809  ///
1810  /// This emits a diagnostic, sets the token kind to eod,
1811  /// and discards the rest of the macro line if the macro name is invalid.
1812  void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
1813                     bool *ShadowFlag = nullptr);
1814
1815  /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
1816  /// entire line) of the macro's tokens and adds them to MacroInfo, and while
1817  /// doing so performs certain validity checks including (but not limited to):
1818  ///   - # (stringization) is followed by a macro parameter
1819  /// \param MacroNameTok - Token that represents the macro name
1820  /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
1821  ///
1822  ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
1823  ///  returns a nullptr if an invalid sequence of tokens is encountered.
1824
1825  MacroInfo *ReadOptionalMacroParameterListAndBody(
1826      const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
1827
1828  /// The ( starting an argument list of a macro definition has just been read.
1829  /// Lex the rest of the parameters and the closing ), updating \p MI with
1830  /// what we learn and saving in \p LastTok the last token read.
1831  /// Return true if an error occurs parsing the arg list.
1832  bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
1833
1834  /// We just read a \#if or related directive and decided that the
1835  /// subsequent tokens are in the \#if'd out portion of the
1836  /// file.  Lex the rest of the file, until we see an \#endif.  If \p
1837  /// FoundNonSkipPortion is true, then we have already emitted code for part of
1838  /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1839  /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1840  /// already seen one so a \#else directive is a duplicate.  When this returns,
1841  /// the caller can lex the first valid token.
1842  void SkipExcludedConditionalBlock(const Token &HashToken,
1843                                    SourceLocation IfTokenLoc,
1844                                    bool FoundNonSkipPortion, bool FoundElse,
1845                                    SourceLocation ElseLoc = SourceLocation());
1846
1847  /// \brief A fast PTH version of SkipExcludedConditionalBlock.
1848  void PTHSkipExcludedConditionalBlock();
1849
1850  /// Information about the result for evaluating an expression for a
1851  /// preprocessor directive.
1852  struct DirectiveEvalResult {
1853    /// Whether the expression was evaluated as true or not.
1854    bool Conditional;
1855    /// True if the expression contained identifiers that were undefined.
1856    bool IncludedUndefinedIds;
1857  };
1858
1859  /// \brief Evaluate an integer constant expression that may occur after a
1860  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
1861  ///
1862  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1863  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1864
1865  /// \brief Install the standard preprocessor pragmas:
1866  /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1867  void RegisterBuiltinPragmas();
1868
1869  /// \brief Register builtin macros such as __LINE__ with the identifier table.
1870  void RegisterBuiltinMacros();
1871
1872  /// If an identifier token is read that is to be expanded as a macro, handle
1873  /// it and return the next token as 'Tok'.  If we lexed a token, return true;
1874  /// otherwise the caller should lex again.
1875  bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD);
1876
1877  /// \brief Cache macro expanded tokens for TokenLexers.
1878  //
1879  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1880  /// going to lex in the cache and when it finishes the tokens are removed
1881  /// from the end of the cache.
1882  Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1883                                  ArrayRef<Token> tokens);
1884  void removeCachedMacroExpandedTokensOfLastLexer();
1885  friend void TokenLexer::ExpandFunctionArguments();
1886
1887  /// Determine whether the next preprocessor token to be
1888  /// lexed is a '('.  If so, consume the token and return true, if not, this
1889  /// method should have no observable side-effect on the lexed tokens.
1890  bool isNextPPTokenLParen();
1891
1892  /// After reading "MACRO(", this method is invoked to read all of the formal
1893  /// arguments specified for the macro invocation.  Returns null on error.
1894  MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
1895                                       SourceLocation &ExpansionEnd);
1896
1897  /// \brief If an identifier token is read that is to be expanded
1898  /// as a builtin macro, handle it and return the next token as 'Tok'.
1899  void ExpandBuiltinMacro(Token &Tok);
1900
1901  /// \brief Read a \c _Pragma directive, slice it up, process it, then
1902  /// return the first token after the directive.
1903  /// This assumes that the \c _Pragma token has just been read into \p Tok.
1904  void Handle_Pragma(Token &Tok);
1905
1906  /// \brief Like Handle_Pragma except the pragma text is not enclosed within
1907  /// a string literal.
1908  void HandleMicrosoft__pragma(Token &Tok);
1909
1910  /// \brief Add a lexer to the top of the include stack and
1911  /// start lexing tokens from it instead of the current buffer.
1912  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1913
1914  /// \brief Add a lexer to the top of the include stack and
1915  /// start getting tokens from it using the PTH cache.
1916  void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1917
1918  /// \brief Set the FileID for the preprocessor predefines.
1919  void setPredefinesFileID(FileID FID) {
1920    assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
1921    PredefinesFileID = FID;
1922  }
1923
1924  /// \brief Returns true if we are lexing from a file and not a
1925  /// pragma or a macro.
1926  static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1927    return L ? !L->isPragmaLexer() : P != nullptr;
1928  }
1929
1930  static bool IsFileLexer(const IncludeStackInfo& I) {
1931    return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
1932  }
1933
1934  bool IsFileLexer() const {
1935    return IsFileLexer(CurLexer.get(), CurPPLexer);
1936  }
1937
1938  //===--------------------------------------------------------------------===//
1939  // Caching stuff.
1940  void CachingLex(Token &Result);
1941  bool InCachingLexMode() const {
1942    // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1943    // that we are past EOF, not that we are in CachingLex mode.
1944    return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
1945           !IncludeMacroStack.empty();
1946  }
1947  void EnterCachingLexMode();
1948  void ExitCachingLexMode() {
1949    if (InCachingLexMode())
1950      RemoveTopOfLexerStack();
1951  }
1952  const Token &PeekAhead(unsigned N);
1953  void AnnotatePreviousCachedTokens(const Token &Tok);
1954
1955  //===--------------------------------------------------------------------===//
1956  /// Handle*Directive - implement the various preprocessor directives.  These
1957  /// should side-effect the current preprocessor object so that the next call
1958  /// to Lex() will return the appropriate token next.
1959  void HandleLineDirective();
1960  void HandleDigitDirective(Token &Tok);
1961  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1962  void HandleIdentSCCSDirective(Token &Tok);
1963  void HandleMacroPublicDirective(Token &Tok);
1964  void HandleMacroPrivateDirective();
1965
1966  // File inclusion.
1967  void HandleIncludeDirective(SourceLocation HashLoc,
1968                              Token &Tok,
1969                              const DirectoryLookup *LookupFrom = nullptr,
1970                              const FileEntry *LookupFromFile = nullptr,
1971                              bool isImport = false);
1972  void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1973  void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1974  void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1975  void HandleMicrosoftImportDirective(Token &Tok);
1976
1977public:
1978  /// Check that the given module is available, producing a diagnostic if not.
1979  /// \return \c true if the check failed (because the module is not available).
1980  ///         \c false if the module appears to be usable.
1981  static bool checkModuleIsAvailable(const LangOptions &LangOpts,
1982                                     const TargetInfo &TargetInfo,
1983                                     DiagnosticsEngine &Diags, Module *M);
1984
1985  // Module inclusion testing.
1986  /// \brief Find the module that owns the source or header file that
1987  /// \p Loc points to. If the location is in a file that was included
1988  /// into a module, or is outside any module, returns nullptr.
1989  Module *getModuleForLocation(SourceLocation Loc);
1990
1991  /// \brief We want to produce a diagnostic at location IncLoc concerning a
1992  /// missing module import.
1993  ///
1994  /// \param IncLoc The location at which the missing import was detected.
1995  /// \param M The desired module.
1996  /// \param MLoc A location within the desired module at which some desired
1997  ///        effect occurred (eg, where a desired entity was declared).
1998  ///
1999  /// \return A file that can be #included to import a module containing MLoc.
2000  ///         Null if no such file could be determined or if a #include is not
2001  ///         appropriate.
2002  const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2003                                                          Module *M,
2004                                                          SourceLocation MLoc);
2005
2006  bool isRecordingPreamble() const {
2007    return PreambleConditionalStack.isRecording();
2008  }
2009
2010  bool hasRecordedPreamble() const {
2011    return PreambleConditionalStack.hasRecordedPreamble();
2012  }
2013
2014  ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2015      return PreambleConditionalStack.getStack();
2016  }
2017
2018  void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2019    PreambleConditionalStack.setStack(s);
2020  }
2021
2022  void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2023    PreambleConditionalStack.startReplaying();
2024    PreambleConditionalStack.setStack(s);
2025  }
2026
2027private:
2028  /// \brief After processing predefined file, initialize the conditional stack from
2029  /// the preamble.
2030  void replayPreambleConditionalStack();
2031
2032  // Macro handling.
2033  void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
2034  void HandleUndefDirective();
2035
2036  // Conditional Inclusion.
2037  void HandleIfdefDirective(Token &Tok, const Token &HashToken,
2038                            bool isIfndef, bool ReadAnyTokensBeforeDirective);
2039  void HandleIfDirective(Token &Tok, const Token &HashToken,
2040                         bool ReadAnyTokensBeforeDirective);
2041  void HandleEndifDirective(Token &Tok);
2042  void HandleElseDirective(Token &Tok, const Token &HashToken);
2043  void HandleElifDirective(Token &Tok, const Token &HashToken);
2044
2045  // Pragmas.
2046  void HandlePragmaDirective(SourceLocation IntroducerLoc,
2047                             PragmaIntroducerKind Introducer);
2048public:
2049  void HandlePragmaOnce(Token &OnceTok);
2050  void HandlePragmaMark();
2051  void HandlePragmaPoison();
2052  void HandlePragmaSystemHeader(Token &SysHeaderTok);
2053  void HandlePragmaDependency(Token &DependencyTok);
2054  void HandlePragmaPushMacro(Token &Tok);
2055  void HandlePragmaPopMacro(Token &Tok);
2056  void HandlePragmaIncludeAlias(Token &Tok);
2057  void HandlePragmaModuleBuild(Token &Tok);
2058  IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2059
2060  // Return true and store the first token only if any CommentHandler
2061  // has inserted some tokens and getCommentRetentionState() is false.
2062  bool HandleComment(Token &Token, SourceRange Comment);
2063
2064  /// \brief A macro is used, update information about macros that need unused
2065  /// warnings.
2066  void markMacroAsUsed(MacroInfo *MI);
2067};
2068
2069/// \brief Abstract base class that describes a handler that will receive
2070/// source ranges for each of the comments encountered in the source file.
2071class CommentHandler {
2072public:
2073  virtual ~CommentHandler();
2074
2075  // The handler shall return true if it has pushed any tokens
2076  // to be read using e.g. EnterToken or EnterTokenStream.
2077  virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2078};
2079
2080/// \brief Registry of pragma handlers added by plugins
2081typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry;
2082
2083}  // end namespace clang
2084
2085#endif
2086