1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief Defines the clang::Preprocessor interface. 12/// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 16#define LLVM_CLANG_LEX_PREPROCESSOR_H 17 18#include "clang/Basic/Builtins.h" 19#include "clang/Basic/Diagnostic.h" 20#include "clang/Basic/IdentifierTable.h" 21#include "clang/Basic/SourceLocation.h" 22#include "clang/Lex/Lexer.h" 23#include "clang/Lex/MacroInfo.h" 24#include "clang/Lex/ModuleMap.h" 25#include "clang/Lex/PPCallbacks.h" 26#include "clang/Lex/PTHLexer.h" 27#include "clang/Lex/TokenLexer.h" 28#include "llvm/ADT/ArrayRef.h" 29#include "llvm/ADT/DenseMap.h" 30#include "llvm/ADT/IntrusiveRefCntPtr.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/TinyPtrVector.h" 34#include "llvm/Support/Allocator.h" 35#include "llvm/Support/Registry.h" 36#include <memory> 37#include <vector> 38 39namespace llvm { 40 template<unsigned InternalLen> class SmallString; 41} 42 43namespace clang { 44 45class SourceManager; 46class ExternalPreprocessorSource; 47class FileManager; 48class FileEntry; 49class HeaderSearch; 50class MemoryBufferCache; 51class PragmaNamespace; 52class PragmaHandler; 53class CommentHandler; 54class ScratchBuffer; 55class TargetInfo; 56class PPCallbacks; 57class CodeCompletionHandler; 58class DirectoryLookup; 59class PreprocessingRecord; 60class ModuleLoader; 61class PTHManager; 62class PreprocessorOptions; 63 64/// \brief Stores token information for comparing actual tokens with 65/// predefined values. Only handles simple tokens and identifiers. 66class TokenValue { 67 tok::TokenKind Kind; 68 IdentifierInfo *II; 69 70public: 71 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 72 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 73 assert(Kind != tok::identifier && 74 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 75 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 76 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 77 } 78 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 79 bool operator==(const Token &Tok) const { 80 return Tok.getKind() == Kind && 81 (!II || II == Tok.getIdentifierInfo()); 82 } 83}; 84 85/// \brief Context in which macro name is used. 86enum MacroUse { 87 MU_Other = 0, // other than #define or #undef 88 MU_Define = 1, // macro name specified in #define 89 MU_Undef = 2 // macro name specified in #undef 90}; 91 92/// \brief Engages in a tight little dance with the lexer to efficiently 93/// preprocess tokens. 94/// 95/// Lexers know only about tokens within a single source file, and don't 96/// know anything about preprocessor-level issues like the \#include stack, 97/// token expansion, etc. 98class Preprocessor { 99 friend class VariadicMacroScopeGuard; 100 friend class VAOptDefinitionContext; 101 std::shared_ptr<PreprocessorOptions> PPOpts; 102 DiagnosticsEngine *Diags; 103 LangOptions &LangOpts; 104 const TargetInfo *Target; 105 const TargetInfo *AuxTarget; 106 FileManager &FileMgr; 107 SourceManager &SourceMgr; 108 MemoryBufferCache &PCMCache; 109 std::unique_ptr<ScratchBuffer> ScratchBuf; 110 HeaderSearch &HeaderInfo; 111 ModuleLoader &TheModuleLoader; 112 113 /// \brief External source of macros. 114 ExternalPreprocessorSource *ExternalSource; 115 116 117 /// An optional PTHManager object used for getting tokens from 118 /// a token cache rather than lexing the original source file. 119 std::unique_ptr<PTHManager> PTH; 120 121 /// A BumpPtrAllocator object used to quickly allocate and release 122 /// objects internal to the Preprocessor. 123 llvm::BumpPtrAllocator BP; 124 125 /// Identifiers for builtin macros and other builtins. 126 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 127 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 128 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 129 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 130 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 131 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 132 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 133 IdentifierInfo *Ident__identifier; // __identifier 134 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 135 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 136 IdentifierInfo *Ident__has_feature; // __has_feature 137 IdentifierInfo *Ident__has_extension; // __has_extension 138 IdentifierInfo *Ident__has_builtin; // __has_builtin 139 IdentifierInfo *Ident__has_attribute; // __has_attribute 140 IdentifierInfo *Ident__has_include; // __has_include 141 IdentifierInfo *Ident__has_include_next; // __has_include_next 142 IdentifierInfo *Ident__has_warning; // __has_warning 143 IdentifierInfo *Ident__is_identifier; // __is_identifier 144 IdentifierInfo *Ident__building_module; // __building_module 145 IdentifierInfo *Ident__MODULE__; // __MODULE__ 146 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 147 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 148 149 SourceLocation DATELoc, TIMELoc; 150 unsigned CounterValue; // Next __COUNTER__ value. 151 152 enum { 153 /// \brief Maximum depth of \#includes. 154 MaxAllowedIncludeStackDepth = 200 155 }; 156 157 // State that is set before the preprocessor begins. 158 bool KeepComments : 1; 159 bool KeepMacroComments : 1; 160 bool SuppressIncludeNotFoundError : 1; 161 162 // State that changes while the preprocessor runs: 163 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 164 165 /// Whether the preprocessor owns the header search object. 166 bool OwnsHeaderSearch : 1; 167 168 /// True if macro expansion is disabled. 169 bool DisableMacroExpansion : 1; 170 171 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 172 /// when parsing preprocessor directives. 173 bool MacroExpansionInDirectivesOverride : 1; 174 175 class ResetMacroExpansionHelper; 176 177 /// \brief Whether we have already loaded macros from the external source. 178 mutable bool ReadMacrosFromExternalSource : 1; 179 180 /// \brief True if pragmas are enabled. 181 bool PragmasEnabled : 1; 182 183 /// \brief True if the current build action is a preprocessing action. 184 bool PreprocessedOutput : 1; 185 186 /// \brief True if we are currently preprocessing a #if or #elif directive 187 bool ParsingIfOrElifDirective; 188 189 /// \brief True if we are pre-expanding macro arguments. 190 bool InMacroArgPreExpansion; 191 192 /// \brief Mapping/lookup information for all identifiers in 193 /// the program, including program keywords. 194 mutable IdentifierTable Identifiers; 195 196 /// \brief This table contains all the selectors in the program. 197 /// 198 /// Unlike IdentifierTable above, this table *isn't* populated by the 199 /// preprocessor. It is declared/expanded here because its role/lifetime is 200 /// conceptually similar to the IdentifierTable. In addition, the current 201 /// control flow (in clang::ParseAST()), make it convenient to put here. 202 /// 203 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 204 /// the lifetime of the preprocessor. 205 SelectorTable Selectors; 206 207 /// \brief Information about builtins. 208 Builtin::Context BuiltinInfo; 209 210 /// \brief Tracks all of the pragmas that the client registered 211 /// with this preprocessor. 212 std::unique_ptr<PragmaNamespace> PragmaHandlers; 213 214 /// \brief Pragma handlers of the original source is stored here during the 215 /// parsing of a model file. 216 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 217 218 /// \brief Tracks all of the comment handlers that the client registered 219 /// with this preprocessor. 220 std::vector<CommentHandler *> CommentHandlers; 221 222 /// \brief True if we want to ignore EOF token and continue later on (thus 223 /// avoid tearing the Lexer and etc. down). 224 bool IncrementalProcessing; 225 226 /// The kind of translation unit we are processing. 227 TranslationUnitKind TUKind; 228 229 /// \brief The code-completion handler. 230 CodeCompletionHandler *CodeComplete; 231 232 /// \brief The file that we're performing code-completion for, if any. 233 const FileEntry *CodeCompletionFile; 234 235 /// \brief The offset in file for the code-completion point. 236 unsigned CodeCompletionOffset; 237 238 /// \brief The location for the code-completion point. This gets instantiated 239 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 240 SourceLocation CodeCompletionLoc; 241 242 /// \brief The start location for the file of the code-completion point. 243 /// 244 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 245 /// for preprocessing. 246 SourceLocation CodeCompletionFileLoc; 247 248 /// \brief The source location of the \c import contextual keyword we just 249 /// lexed, if any. 250 SourceLocation ModuleImportLoc; 251 252 /// \brief The module import path that we're currently processing. 253 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 254 255 /// \brief Whether the last token we lexed was an '@'. 256 bool LastTokenWasAt; 257 258 /// \brief Whether the module import expects an identifier next. Otherwise, 259 /// it expects a '.' or ';'. 260 bool ModuleImportExpectsIdentifier; 261 262 /// \brief The source location of the currently-active 263 /// \#pragma clang arc_cf_code_audited begin. 264 SourceLocation PragmaARCCFCodeAuditedLoc; 265 266 /// \brief The source location of the currently-active 267 /// \#pragma clang assume_nonnull begin. 268 SourceLocation PragmaAssumeNonNullLoc; 269 270 /// \brief True if we hit the code-completion point. 271 bool CodeCompletionReached; 272 273 /// \brief The code completion token containing the information 274 /// on the stem that is to be code completed. 275 IdentifierInfo *CodeCompletionII; 276 277 /// \brief The directory that the main file should be considered to occupy, 278 /// if it does not correspond to a real file (as happens when building a 279 /// module). 280 const DirectoryEntry *MainFileDir; 281 282 /// \brief The number of bytes that we will initially skip when entering the 283 /// main file, along with a flag that indicates whether skipping this number 284 /// of bytes will place the lexer at the start of a line. 285 /// 286 /// This is used when loading a precompiled preamble. 287 std::pair<int, bool> SkipMainFilePreamble; 288 289 class PreambleConditionalStackStore { 290 enum State { 291 Off = 0, 292 Recording = 1, 293 Replaying = 2, 294 }; 295 296 public: 297 PreambleConditionalStackStore() : ConditionalStackState(Off) {} 298 299 void startRecording() { ConditionalStackState = Recording; } 300 void startReplaying() { ConditionalStackState = Replaying; } 301 bool isRecording() const { return ConditionalStackState == Recording; } 302 bool isReplaying() const { return ConditionalStackState == Replaying; } 303 304 ArrayRef<PPConditionalInfo> getStack() const { 305 return ConditionalStack; 306 } 307 308 void doneReplaying() { 309 ConditionalStack.clear(); 310 ConditionalStackState = Off; 311 } 312 313 void setStack(ArrayRef<PPConditionalInfo> s) { 314 if (!isRecording() && !isReplaying()) 315 return; 316 ConditionalStack.clear(); 317 ConditionalStack.append(s.begin(), s.end()); 318 } 319 320 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 321 322 private: 323 SmallVector<PPConditionalInfo, 4> ConditionalStack; 324 State ConditionalStackState; 325 } PreambleConditionalStack; 326 327 /// \brief The current top of the stack that we're lexing from if 328 /// not expanding a macro and we are lexing directly from source code. 329 /// 330 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 331 std::unique_ptr<Lexer> CurLexer; 332 333 /// \brief The current top of stack that we're lexing from if 334 /// not expanding from a macro and we are lexing from a PTH cache. 335 /// 336 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 337 std::unique_ptr<PTHLexer> CurPTHLexer; 338 339 /// \brief The current top of the stack what we're lexing from 340 /// if not expanding a macro. 341 /// 342 /// This is an alias for either CurLexer or CurPTHLexer. 343 PreprocessorLexer *CurPPLexer; 344 345 /// \brief Used to find the current FileEntry, if CurLexer is non-null 346 /// and if applicable. 347 /// 348 /// This allows us to implement \#include_next and find directory-specific 349 /// properties. 350 const DirectoryLookup *CurDirLookup; 351 352 /// \brief The current macro we are expanding, if we are expanding a macro. 353 /// 354 /// One of CurLexer and CurTokenLexer must be null. 355 std::unique_ptr<TokenLexer> CurTokenLexer; 356 357 /// \brief The kind of lexer we're currently working with. 358 enum CurLexerKind { 359 CLK_Lexer, 360 CLK_PTHLexer, 361 CLK_TokenLexer, 362 CLK_CachingLexer, 363 CLK_LexAfterModuleImport 364 } CurLexerKind; 365 366 /// \brief If the current lexer is for a submodule that is being built, this 367 /// is that submodule. 368 Module *CurLexerSubmodule; 369 370 /// \brief Keeps track of the stack of files currently 371 /// \#included, and macros currently being expanded from, not counting 372 /// CurLexer/CurTokenLexer. 373 struct IncludeStackInfo { 374 enum CurLexerKind CurLexerKind; 375 Module *TheSubmodule; 376 std::unique_ptr<Lexer> TheLexer; 377 std::unique_ptr<PTHLexer> ThePTHLexer; 378 PreprocessorLexer *ThePPLexer; 379 std::unique_ptr<TokenLexer> TheTokenLexer; 380 const DirectoryLookup *TheDirLookup; 381 382 // The following constructors are completely useless copies of the default 383 // versions, only needed to pacify MSVC. 384 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 385 std::unique_ptr<Lexer> &&TheLexer, 386 std::unique_ptr<PTHLexer> &&ThePTHLexer, 387 PreprocessorLexer *ThePPLexer, 388 std::unique_ptr<TokenLexer> &&TheTokenLexer, 389 const DirectoryLookup *TheDirLookup) 390 : CurLexerKind(std::move(CurLexerKind)), 391 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 392 ThePTHLexer(std::move(ThePTHLexer)), 393 ThePPLexer(std::move(ThePPLexer)), 394 TheTokenLexer(std::move(TheTokenLexer)), 395 TheDirLookup(std::move(TheDirLookup)) {} 396 }; 397 std::vector<IncludeStackInfo> IncludeMacroStack; 398 399 /// \brief Actions invoked when some preprocessor activity is 400 /// encountered (e.g. a file is \#included, etc). 401 std::unique_ptr<PPCallbacks> Callbacks; 402 403 struct MacroExpandsInfo { 404 Token Tok; 405 MacroDefinition MD; 406 SourceRange Range; 407 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 408 : Tok(Tok), MD(MD), Range(Range) { } 409 }; 410 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 411 412 /// Information about a name that has been used to define a module macro. 413 struct ModuleMacroInfo { 414 ModuleMacroInfo(MacroDirective *MD) 415 : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {} 416 417 /// The most recent macro directive for this identifier. 418 MacroDirective *MD; 419 /// The active module macros for this identifier. 420 llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros; 421 /// The generation number at which we last updated ActiveModuleMacros. 422 /// \see Preprocessor::VisibleModules. 423 unsigned ActiveModuleMacrosGeneration; 424 /// Whether this macro name is ambiguous. 425 bool IsAmbiguous; 426 /// The module macros that are overridden by this macro. 427 llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros; 428 }; 429 430 /// The state of a macro for an identifier. 431 class MacroState { 432 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 433 434 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 435 const IdentifierInfo *II) const { 436 if (II->isOutOfDate()) 437 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 438 // FIXME: Find a spare bit on IdentifierInfo and store a 439 // HasModuleMacros flag. 440 if (!II->hasMacroDefinition() || 441 (!PP.getLangOpts().Modules && 442 !PP.getLangOpts().ModulesLocalVisibility) || 443 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 444 return nullptr; 445 446 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 447 if (!Info) { 448 Info = new (PP.getPreprocessorAllocator()) 449 ModuleMacroInfo(State.get<MacroDirective *>()); 450 State = Info; 451 } 452 453 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 454 Info->ActiveModuleMacrosGeneration) 455 PP.updateModuleMacroInfo(II, *Info); 456 return Info; 457 } 458 459 public: 460 MacroState() : MacroState(nullptr) {} 461 MacroState(MacroDirective *MD) : State(MD) {} 462 MacroState(MacroState &&O) noexcept : State(O.State) { 463 O.State = (MacroDirective *)nullptr; 464 } 465 MacroState &operator=(MacroState &&O) noexcept { 466 auto S = O.State; 467 O.State = (MacroDirective *)nullptr; 468 State = S; 469 return *this; 470 } 471 ~MacroState() { 472 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 473 Info->~ModuleMacroInfo(); 474 } 475 476 MacroDirective *getLatest() const { 477 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 478 return Info->MD; 479 return State.get<MacroDirective*>(); 480 } 481 void setLatest(MacroDirective *MD) { 482 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 483 Info->MD = MD; 484 else 485 State = MD; 486 } 487 488 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 489 auto *Info = getModuleInfo(PP, II); 490 return Info ? Info->IsAmbiguous : false; 491 } 492 ArrayRef<ModuleMacro *> 493 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 494 if (auto *Info = getModuleInfo(PP, II)) 495 return Info->ActiveModuleMacros; 496 return None; 497 } 498 499 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 500 SourceManager &SourceMgr) const { 501 // FIXME: Incorporate module macros into the result of this. 502 if (auto *Latest = getLatest()) 503 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 504 return MacroDirective::DefInfo(); 505 } 506 507 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 508 if (auto *Info = getModuleInfo(PP, II)) { 509 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 510 Info->ActiveModuleMacros.begin(), 511 Info->ActiveModuleMacros.end()); 512 Info->ActiveModuleMacros.clear(); 513 Info->IsAmbiguous = false; 514 } 515 } 516 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 517 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 518 return Info->OverriddenMacros; 519 return None; 520 } 521 void setOverriddenMacros(Preprocessor &PP, 522 ArrayRef<ModuleMacro *> Overrides) { 523 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 524 if (!Info) { 525 if (Overrides.empty()) 526 return; 527 Info = new (PP.getPreprocessorAllocator()) 528 ModuleMacroInfo(State.get<MacroDirective *>()); 529 State = Info; 530 } 531 Info->OverriddenMacros.clear(); 532 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 533 Overrides.begin(), Overrides.end()); 534 Info->ActiveModuleMacrosGeneration = 0; 535 } 536 }; 537 538 /// For each IdentifierInfo that was associated with a macro, we 539 /// keep a mapping to the history of all macro definitions and #undefs in 540 /// the reverse order (the latest one is in the head of the list). 541 /// 542 /// This mapping lives within the \p CurSubmoduleState. 543 typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap; 544 545 friend class ASTReader; 546 547 struct SubmoduleState; 548 549 /// \brief Information about a submodule that we're currently building. 550 struct BuildingSubmoduleInfo { 551 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 552 SubmoduleState *OuterSubmoduleState, 553 unsigned OuterPendingModuleMacroNames) 554 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 555 OuterSubmoduleState(OuterSubmoduleState), 556 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 557 558 /// The module that we are building. 559 Module *M; 560 /// The location at which the module was included. 561 SourceLocation ImportLoc; 562 /// Whether we entered this submodule via a pragma. 563 bool IsPragma; 564 /// The previous SubmoduleState. 565 SubmoduleState *OuterSubmoduleState; 566 /// The number of pending module macro names when we started building this. 567 unsigned OuterPendingModuleMacroNames; 568 }; 569 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 570 571 /// \brief Information about a submodule's preprocessor state. 572 struct SubmoduleState { 573 /// The macros for the submodule. 574 MacroMap Macros; 575 /// The set of modules that are visible within the submodule. 576 VisibleModuleSet VisibleModules; 577 // FIXME: CounterValue? 578 // FIXME: PragmaPushMacroInfo? 579 }; 580 std::map<Module*, SubmoduleState> Submodules; 581 582 /// The preprocessor state for preprocessing outside of any submodule. 583 SubmoduleState NullSubmoduleState; 584 585 /// The current submodule state. Will be \p NullSubmoduleState if we're not 586 /// in a submodule. 587 SubmoduleState *CurSubmoduleState; 588 589 /// The set of known macros exported from modules. 590 llvm::FoldingSet<ModuleMacro> ModuleMacros; 591 592 /// The names of potential module macros that we've not yet processed. 593 llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames; 594 595 /// The list of module macros, for each identifier, that are not overridden by 596 /// any other module macro. 597 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>> 598 LeafModuleMacros; 599 600 /// \brief Macros that we want to warn because they are not used at the end 601 /// of the translation unit. 602 /// 603 /// We store just their SourceLocations instead of 604 /// something like MacroInfo*. The benefit of this is that when we are 605 /// deserializing from PCH, we don't need to deserialize identifier & macros 606 /// just so that we can report that they are unused, we just warn using 607 /// the SourceLocations of this set (that will be filled by the ASTReader). 608 /// We are using SmallPtrSet instead of a vector for faster removal. 609 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy; 610 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 611 612 /// \brief A "freelist" of MacroArg objects that can be 613 /// reused for quick allocation. 614 MacroArgs *MacroArgCache; 615 friend class MacroArgs; 616 617 /// For each IdentifierInfo used in a \#pragma push_macro directive, 618 /// we keep a MacroInfo stack used to restore the previous macro value. 619 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo; 620 621 // Various statistics we track for performance analysis. 622 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma; 623 unsigned NumIf, NumElse, NumEndif; 624 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; 625 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; 626 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; 627 unsigned NumSkipped; 628 629 /// \brief The predefined macros that preprocessor should use from the 630 /// command line etc. 631 std::string Predefines; 632 633 /// \brief The file ID for the preprocessor predefines. 634 FileID PredefinesFileID; 635 636 /// \{ 637 /// \brief Cache of macro expanders to reduce malloc traffic. 638 enum { TokenLexerCacheSize = 8 }; 639 unsigned NumCachedTokenLexers; 640 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 641 /// \} 642 643 /// \brief Keeps macro expanded tokens for TokenLexers. 644 // 645 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 646 /// going to lex in the cache and when it finishes the tokens are removed 647 /// from the end of the cache. 648 SmallVector<Token, 16> MacroExpandedTokens; 649 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack; 650 651 /// \brief A record of the macro definitions and expansions that 652 /// occurred during preprocessing. 653 /// 654 /// This is an optional side structure that can be enabled with 655 /// \c createPreprocessingRecord() prior to preprocessing. 656 PreprocessingRecord *Record; 657 658 /// Cached tokens state. 659 typedef SmallVector<Token, 1> CachedTokensTy; 660 661 /// \brief Cached tokens are stored here when we do backtracking or 662 /// lookahead. They are "lexed" by the CachingLex() method. 663 CachedTokensTy CachedTokens; 664 665 /// \brief The position of the cached token that CachingLex() should 666 /// "lex" next. 667 /// 668 /// If it points beyond the CachedTokens vector, it means that a normal 669 /// Lex() should be invoked. 670 CachedTokensTy::size_type CachedLexPos; 671 672 /// \brief Stack of backtrack positions, allowing nested backtracks. 673 /// 674 /// The EnableBacktrackAtThisPos() method pushes a position to 675 /// indicate where CachedLexPos should be set when the BackTrack() method is 676 /// invoked (at which point the last position is popped). 677 std::vector<CachedTokensTy::size_type> BacktrackPositions; 678 679 struct MacroInfoChain { 680 MacroInfo MI; 681 MacroInfoChain *Next; 682 }; 683 684 /// MacroInfos are managed as a chain for easy disposal. This is the head 685 /// of that list. 686 MacroInfoChain *MIChainHead; 687 688 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 689 690public: 691 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 692 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 693 MemoryBufferCache &PCMCache, 694 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 695 IdentifierInfoLookup *IILookup = nullptr, 696 bool OwnsHeaderSearch = false, 697 TranslationUnitKind TUKind = TU_Complete); 698 699 ~Preprocessor(); 700 701 /// \brief Initialize the preprocessor using information about the target. 702 /// 703 /// \param Target is owned by the caller and must remain valid for the 704 /// lifetime of the preprocessor. 705 /// \param AuxTarget is owned by the caller and must remain valid for 706 /// the lifetime of the preprocessor. 707 void Initialize(const TargetInfo &Target, 708 const TargetInfo *AuxTarget = nullptr); 709 710 /// \brief Initialize the preprocessor to parse a model file 711 /// 712 /// To parse model files the preprocessor of the original source is reused to 713 /// preserver the identifier table. However to avoid some duplicate 714 /// information in the preprocessor some cleanup is needed before it is used 715 /// to parse model files. This method does that cleanup. 716 void InitializeForModelFile(); 717 718 /// \brief Cleanup after model file parsing 719 void FinalizeForModelFile(); 720 721 /// \brief Retrieve the preprocessor options used to initialize this 722 /// preprocessor. 723 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 724 725 DiagnosticsEngine &getDiagnostics() const { return *Diags; } 726 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 727 728 const LangOptions &getLangOpts() const { return LangOpts; } 729 const TargetInfo &getTargetInfo() const { return *Target; } 730 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } 731 FileManager &getFileManager() const { return FileMgr; } 732 SourceManager &getSourceManager() const { return SourceMgr; } 733 MemoryBufferCache &getPCMCache() const { return PCMCache; } 734 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 735 736 IdentifierTable &getIdentifierTable() { return Identifiers; } 737 const IdentifierTable &getIdentifierTable() const { return Identifiers; } 738 SelectorTable &getSelectorTable() { return Selectors; } 739 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; } 740 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 741 742 void setPTHManager(PTHManager* pm); 743 744 PTHManager *getPTHManager() { return PTH.get(); } 745 746 void setExternalSource(ExternalPreprocessorSource *Source) { 747 ExternalSource = Source; 748 } 749 750 ExternalPreprocessorSource *getExternalSource() const { 751 return ExternalSource; 752 } 753 754 /// \brief Retrieve the module loader associated with this preprocessor. 755 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 756 757 bool hadModuleLoaderFatalFailure() const { 758 return TheModuleLoader.HadFatalFailure; 759 } 760 761 /// \brief True if we are currently preprocessing a #if or #elif directive 762 bool isParsingIfOrElifDirective() const { 763 return ParsingIfOrElifDirective; 764 } 765 766 /// \brief Control whether the preprocessor retains comments in output. 767 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 768 this->KeepComments = KeepComments | KeepMacroComments; 769 this->KeepMacroComments = KeepMacroComments; 770 } 771 772 bool getCommentRetentionState() const { return KeepComments; } 773 774 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } 775 bool getPragmasEnabled() const { return PragmasEnabled; } 776 777 void SetSuppressIncludeNotFoundError(bool Suppress) { 778 SuppressIncludeNotFoundError = Suppress; 779 } 780 781 bool GetSuppressIncludeNotFoundError() { 782 return SuppressIncludeNotFoundError; 783 } 784 785 /// Sets whether the preprocessor is responsible for producing output or if 786 /// it is producing tokens to be consumed by Parse and Sema. 787 void setPreprocessedOutput(bool IsPreprocessedOutput) { 788 PreprocessedOutput = IsPreprocessedOutput; 789 } 790 791 /// Returns true if the preprocessor is responsible for generating output, 792 /// false if it is producing tokens to be consumed by Parse and Sema. 793 bool isPreprocessedOutput() const { return PreprocessedOutput; } 794 795 /// \brief Return true if we are lexing directly from the specified lexer. 796 bool isCurrentLexer(const PreprocessorLexer *L) const { 797 return CurPPLexer == L; 798 } 799 800 /// \brief Return the current lexer being lexed from. 801 /// 802 /// Note that this ignores any potentially active macro expansions and _Pragma 803 /// expansions going on at the time. 804 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 805 806 /// \brief Return the current file lexer being lexed from. 807 /// 808 /// Note that this ignores any potentially active macro expansions and _Pragma 809 /// expansions going on at the time. 810 PreprocessorLexer *getCurrentFileLexer() const; 811 812 /// \brief Return the submodule owning the file being lexed. This may not be 813 /// the current module if we have changed modules since entering the file. 814 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 815 816 /// \brief Returns the FileID for the preprocessor predefines. 817 FileID getPredefinesFileID() const { return PredefinesFileID; } 818 819 /// \{ 820 /// \brief Accessors for preprocessor callbacks. 821 /// 822 /// Note that this class takes ownership of any PPCallbacks object given to 823 /// it. 824 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } 825 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 826 if (Callbacks) 827 C = llvm::make_unique<PPChainedCallbacks>(std::move(C), 828 std::move(Callbacks)); 829 Callbacks = std::move(C); 830 } 831 /// \} 832 833 bool isMacroDefined(StringRef Id) { 834 return isMacroDefined(&Identifiers.get(Id)); 835 } 836 bool isMacroDefined(const IdentifierInfo *II) { 837 return II->hasMacroDefinition() && 838 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 839 } 840 841 /// \brief Determine whether II is defined as a macro within the module M, 842 /// if that is a module that we've already preprocessed. Does not check for 843 /// macros imported into M. 844 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 845 if (!II->hasMacroDefinition()) 846 return false; 847 auto I = Submodules.find(M); 848 if (I == Submodules.end()) 849 return false; 850 auto J = I->second.Macros.find(II); 851 if (J == I->second.Macros.end()) 852 return false; 853 auto *MD = J->second.getLatest(); 854 return MD && MD->isDefined(); 855 } 856 857 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 858 if (!II->hasMacroDefinition()) 859 return MacroDefinition(); 860 861 MacroState &S = CurSubmoduleState->Macros[II]; 862 auto *MD = S.getLatest(); 863 while (MD && isa<VisibilityMacroDirective>(MD)) 864 MD = MD->getPrevious(); 865 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 866 S.getActiveModuleMacros(*this, II), 867 S.isAmbiguous(*this, II)); 868 } 869 870 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 871 SourceLocation Loc) { 872 if (!II->hadMacroDefinition()) 873 return MacroDefinition(); 874 875 MacroState &S = CurSubmoduleState->Macros[II]; 876 MacroDirective::DefInfo DI; 877 if (auto *MD = S.getLatest()) 878 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 879 // FIXME: Compute the set of active module macros at the specified location. 880 return MacroDefinition(DI.getDirective(), 881 S.getActiveModuleMacros(*this, II), 882 S.isAmbiguous(*this, II)); 883 } 884 885 /// \brief Given an identifier, return its latest non-imported MacroDirective 886 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. 887 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 888 if (!II->hasMacroDefinition()) 889 return nullptr; 890 891 auto *MD = getLocalMacroDirectiveHistory(II); 892 if (!MD || MD->getDefinition().isUndefined()) 893 return nullptr; 894 895 return MD; 896 } 897 898 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 899 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 900 } 901 902 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 903 if (!II->hasMacroDefinition()) 904 return nullptr; 905 if (auto MD = getMacroDefinition(II)) 906 return MD.getMacroInfo(); 907 return nullptr; 908 } 909 910 /// \brief Given an identifier, return the latest non-imported macro 911 /// directive for that identifier. 912 /// 913 /// One can iterate over all previous macro directives from the most recent 914 /// one. 915 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 916 917 /// \brief Add a directive to the macro directive history for this identifier. 918 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); 919 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 920 SourceLocation Loc) { 921 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 922 appendMacroDirective(II, MD); 923 return MD; 924 } 925 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 926 MacroInfo *MI) { 927 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 928 } 929 /// \brief Set a MacroDirective that was loaded from a PCH file. 930 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 931 MacroDirective *MD); 932 933 /// \brief Register an exported macro for a module and identifier. 934 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 935 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 936 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II); 937 938 /// \brief Get the list of leaf (non-overridden) module macros for a name. 939 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 940 if (II->isOutOfDate()) 941 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 942 auto I = LeafModuleMacros.find(II); 943 if (I != LeafModuleMacros.end()) 944 return I->second; 945 return None; 946 } 947 948 /// \{ 949 /// Iterators for the macro history table. Currently defined macros have 950 /// IdentifierInfo::hasMacroDefinition() set and an empty 951 /// MacroInfo::getUndefLoc() at the head of the list. 952 typedef MacroMap::const_iterator macro_iterator; 953 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 954 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 955 llvm::iterator_range<macro_iterator> 956 macros(bool IncludeExternalMacros = true) const { 957 return llvm::make_range(macro_begin(IncludeExternalMacros), 958 macro_end(IncludeExternalMacros)); 959 } 960 /// \} 961 962 /// \brief Return the name of the macro defined before \p Loc that has 963 /// spelling \p Tokens. If there are multiple macros with same spelling, 964 /// return the last one defined. 965 StringRef getLastMacroWithSpelling(SourceLocation Loc, 966 ArrayRef<TokenValue> Tokens) const; 967 968 const std::string &getPredefines() const { return Predefines; } 969 /// \brief Set the predefines for this Preprocessor. 970 /// 971 /// These predefines are automatically injected when parsing the main file. 972 void setPredefines(const char *P) { Predefines = P; } 973 void setPredefines(StringRef P) { Predefines = P; } 974 975 /// Return information about the specified preprocessor 976 /// identifier token. 977 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 978 return &Identifiers.get(Name); 979 } 980 981 /// \brief Add the specified pragma handler to this preprocessor. 982 /// 983 /// If \p Namespace is non-null, then it is a token required to exist on the 984 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 985 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); 986 void AddPragmaHandler(PragmaHandler *Handler) { 987 AddPragmaHandler(StringRef(), Handler); 988 } 989 990 /// \brief Remove the specific pragma handler from this preprocessor. 991 /// 992 /// If \p Namespace is non-null, then it should be the namespace that 993 /// \p Handler was added to. It is an error to remove a handler that 994 /// has not been registered. 995 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); 996 void RemovePragmaHandler(PragmaHandler *Handler) { 997 RemovePragmaHandler(StringRef(), Handler); 998 } 999 1000 /// Install empty handlers for all pragmas (making them ignored). 1001 void IgnorePragmas(); 1002 1003 /// \brief Add the specified comment handler to the preprocessor. 1004 void addCommentHandler(CommentHandler *Handler); 1005 1006 /// \brief Remove the specified comment handler. 1007 /// 1008 /// It is an error to remove a handler that has not been registered. 1009 void removeCommentHandler(CommentHandler *Handler); 1010 1011 /// \brief Set the code completion handler to the given object. 1012 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1013 CodeComplete = &Handler; 1014 } 1015 1016 /// \brief Retrieve the current code-completion handler. 1017 CodeCompletionHandler *getCodeCompletionHandler() const { 1018 return CodeComplete; 1019 } 1020 1021 /// \brief Clear out the code completion handler. 1022 void clearCodeCompletionHandler() { 1023 CodeComplete = nullptr; 1024 } 1025 1026 /// \brief Hook used by the lexer to invoke the "natural language" code 1027 /// completion point. 1028 void CodeCompleteNaturalLanguage(); 1029 1030 /// \brief Set the code completion token for filtering purposes. 1031 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1032 CodeCompletionII = Filter; 1033 } 1034 1035 /// \brief Get the code completion token for filtering purposes. 1036 StringRef getCodeCompletionFilter() { 1037 if (CodeCompletionII) 1038 return CodeCompletionII->getName(); 1039 return {}; 1040 } 1041 1042 /// \brief Retrieve the preprocessing record, or NULL if there is no 1043 /// preprocessing record. 1044 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1045 1046 /// \brief Create a new preprocessing record, which will keep track of 1047 /// all macro expansions, macro definitions, etc. 1048 void createPreprocessingRecord(); 1049 1050 /// \brief Enter the specified FileID as the main source file, 1051 /// which implicitly adds the builtin defines etc. 1052 void EnterMainSourceFile(); 1053 1054 /// \brief Inform the preprocessor callbacks that processing is complete. 1055 void EndSourceFile(); 1056 1057 /// \brief Add a source file to the top of the include stack and 1058 /// start lexing tokens from it instead of the current buffer. 1059 /// 1060 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1061 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, 1062 SourceLocation Loc); 1063 1064 /// \brief Add a Macro to the top of the include stack and start lexing 1065 /// tokens from it instead of the current buffer. 1066 /// 1067 /// \param Args specifies the tokens input to a function-like macro. 1068 /// \param ILEnd specifies the location of the ')' for a function-like macro 1069 /// or the identifier for an object-like macro. 1070 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro, 1071 MacroArgs *Args); 1072 1073 /// \brief Add a "macro" context to the top of the include stack, 1074 /// which will cause the lexer to start returning the specified tokens. 1075 /// 1076 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1077 /// will not be subject to further macro expansion. Otherwise, these tokens 1078 /// will be re-macro-expanded when/if expansion is enabled. 1079 /// 1080 /// If \p OwnsTokens is false, this method assumes that the specified stream 1081 /// of tokens has a permanent owner somewhere, so they do not need to be 1082 /// copied. If it is true, it assumes the array of tokens is allocated with 1083 /// \c new[] and the Preprocessor will delete[] it. 1084private: 1085 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1086 bool DisableMacroExpansion, bool OwnsTokens); 1087 1088public: 1089 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1090 bool DisableMacroExpansion) { 1091 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true); 1092 } 1093 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) { 1094 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false); 1095 } 1096 1097 /// \brief Pop the current lexer/macro exp off the top of the lexer stack. 1098 /// 1099 /// This should only be used in situations where the current state of the 1100 /// top-of-stack lexer is known. 1101 void RemoveTopOfLexerStack(); 1102 1103 /// From the point that this method is called, and until 1104 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1105 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1106 /// make the Preprocessor re-lex the same tokens. 1107 /// 1108 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1109 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1110 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1111 /// 1112 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1113 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1114 /// tokens will continue indefinitely. 1115 /// 1116 void EnableBacktrackAtThisPos(); 1117 1118 /// \brief Disable the last EnableBacktrackAtThisPos call. 1119 void CommitBacktrackedTokens(); 1120 1121 struct CachedTokensRange { 1122 CachedTokensTy::size_type Begin, End; 1123 }; 1124 1125private: 1126 /// \brief A range of cached tokens that should be erased after lexing 1127 /// when backtracking requires the erasure of such cached tokens. 1128 Optional<CachedTokensRange> CachedTokenRangeToErase; 1129 1130public: 1131 /// \brief Returns the range of cached tokens that were lexed since 1132 /// EnableBacktrackAtThisPos() was previously called. 1133 CachedTokensRange LastCachedTokenRange(); 1134 1135 /// \brief Erase the range of cached tokens that were lexed since 1136 /// EnableBacktrackAtThisPos() was previously called. 1137 void EraseCachedTokens(CachedTokensRange TokenRange); 1138 1139 /// \brief Make Preprocessor re-lex the tokens that were lexed since 1140 /// EnableBacktrackAtThisPos() was previously called. 1141 void Backtrack(); 1142 1143 /// \brief True if EnableBacktrackAtThisPos() was called and 1144 /// caching of tokens is on. 1145 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1146 1147 /// \brief Lex the next token for this preprocessor. 1148 void Lex(Token &Result); 1149 1150 void LexAfterModuleImport(Token &Result); 1151 1152 void makeModuleVisible(Module *M, SourceLocation Loc); 1153 1154 SourceLocation getModuleImportLoc(Module *M) const { 1155 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1156 } 1157 1158 /// \brief Lex a string literal, which may be the concatenation of multiple 1159 /// string literals and may even come from macro expansion. 1160 /// \returns true on success, false if a error diagnostic has been generated. 1161 bool LexStringLiteral(Token &Result, std::string &String, 1162 const char *DiagnosticTag, bool AllowMacroExpansion) { 1163 if (AllowMacroExpansion) 1164 Lex(Result); 1165 else 1166 LexUnexpandedToken(Result); 1167 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1168 AllowMacroExpansion); 1169 } 1170 1171 /// \brief Complete the lexing of a string literal where the first token has 1172 /// already been lexed (see LexStringLiteral). 1173 bool FinishLexStringLiteral(Token &Result, std::string &String, 1174 const char *DiagnosticTag, 1175 bool AllowMacroExpansion); 1176 1177 /// \brief Lex a token. If it's a comment, keep lexing until we get 1178 /// something not a comment. 1179 /// 1180 /// This is useful in -E -C mode where comments would foul up preprocessor 1181 /// directive handling. 1182 void LexNonComment(Token &Result) { 1183 do 1184 Lex(Result); 1185 while (Result.getKind() == tok::comment); 1186 } 1187 1188 /// \brief Just like Lex, but disables macro expansion of identifier tokens. 1189 void LexUnexpandedToken(Token &Result) { 1190 // Disable macro expansion. 1191 bool OldVal = DisableMacroExpansion; 1192 DisableMacroExpansion = true; 1193 // Lex the token. 1194 Lex(Result); 1195 1196 // Reenable it. 1197 DisableMacroExpansion = OldVal; 1198 } 1199 1200 /// \brief Like LexNonComment, but this disables macro expansion of 1201 /// identifier tokens. 1202 void LexUnexpandedNonComment(Token &Result) { 1203 do 1204 LexUnexpandedToken(Result); 1205 while (Result.getKind() == tok::comment); 1206 } 1207 1208 /// \brief Parses a simple integer literal to get its numeric value. Floating 1209 /// point literals and user defined literals are rejected. Used primarily to 1210 /// handle pragmas that accept integer arguments. 1211 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1212 1213 /// Disables macro expansion everywhere except for preprocessor directives. 1214 void SetMacroExpansionOnlyInDirectives() { 1215 DisableMacroExpansion = true; 1216 MacroExpansionInDirectivesOverride = true; 1217 } 1218 1219 /// \brief Peeks ahead N tokens and returns that token without consuming any 1220 /// tokens. 1221 /// 1222 /// LookAhead(0) returns the next token that would be returned by Lex(), 1223 /// LookAhead(1) returns the token after it, etc. This returns normal 1224 /// tokens after phase 5. As such, it is equivalent to using 1225 /// 'Lex', not 'LexUnexpandedToken'. 1226 const Token &LookAhead(unsigned N) { 1227 if (CachedLexPos + N < CachedTokens.size()) 1228 return CachedTokens[CachedLexPos+N]; 1229 else 1230 return PeekAhead(N+1); 1231 } 1232 1233 /// \brief When backtracking is enabled and tokens are cached, 1234 /// this allows to revert a specific number of tokens. 1235 /// 1236 /// Note that the number of tokens being reverted should be up to the last 1237 /// backtrack position, not more. 1238 void RevertCachedTokens(unsigned N) { 1239 assert(isBacktrackEnabled() && 1240 "Should only be called when tokens are cached for backtracking"); 1241 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1242 && "Should revert tokens up to the last backtrack position, not more"); 1243 assert(signed(CachedLexPos) - signed(N) >= 0 && 1244 "Corrupted backtrack positions ?"); 1245 CachedLexPos -= N; 1246 } 1247 1248 /// \brief Enters a token in the token stream to be lexed next. 1249 /// 1250 /// If BackTrack() is called afterwards, the token will remain at the 1251 /// insertion point. 1252 void EnterToken(const Token &Tok) { 1253 EnterCachingLexMode(); 1254 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1255 } 1256 1257 /// We notify the Preprocessor that if it is caching tokens (because 1258 /// backtrack is enabled) it should replace the most recent cached tokens 1259 /// with the given annotation token. This function has no effect if 1260 /// backtracking is not enabled. 1261 /// 1262 /// Note that the use of this function is just for optimization, so that the 1263 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1264 /// invoked. 1265 void AnnotateCachedTokens(const Token &Tok) { 1266 assert(Tok.isAnnotation() && "Expected annotation token"); 1267 if (CachedLexPos != 0 && isBacktrackEnabled()) 1268 AnnotatePreviousCachedTokens(Tok); 1269 } 1270 1271 /// Get the location of the last cached token, suitable for setting the end 1272 /// location of an annotation token. 1273 SourceLocation getLastCachedTokenLocation() const { 1274 assert(CachedLexPos != 0); 1275 return CachedTokens[CachedLexPos-1].getLastLoc(); 1276 } 1277 1278 /// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1279 /// CachedTokens. 1280 bool IsPreviousCachedToken(const Token &Tok) const; 1281 1282 /// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1283 /// in \p NewToks. 1284 /// 1285 /// Useful when a token needs to be split in smaller ones and CachedTokens 1286 /// most recent token must to be updated to reflect that. 1287 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1288 1289 /// \brief Replace the last token with an annotation token. 1290 /// 1291 /// Like AnnotateCachedTokens(), this routine replaces an 1292 /// already-parsed (and resolved) token with an annotation 1293 /// token. However, this routine only replaces the last token with 1294 /// the annotation token; it does not affect any other cached 1295 /// tokens. This function has no effect if backtracking is not 1296 /// enabled. 1297 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1298 assert(Tok.isAnnotation() && "Expected annotation token"); 1299 if (CachedLexPos != 0 && isBacktrackEnabled()) 1300 CachedTokens[CachedLexPos-1] = Tok; 1301 } 1302 1303 /// Enter an annotation token into the token stream. 1304 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1305 void *AnnotationVal); 1306 1307 /// Update the current token to represent the provided 1308 /// identifier, in order to cache an action performed by typo correction. 1309 void TypoCorrectToken(const Token &Tok) { 1310 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1311 if (CachedLexPos != 0 && isBacktrackEnabled()) 1312 CachedTokens[CachedLexPos-1] = Tok; 1313 } 1314 1315 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ 1316 /// CurTokenLexer pointers. 1317 void recomputeCurLexerKind(); 1318 1319 /// \brief Returns true if incremental processing is enabled 1320 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1321 1322 /// \brief Enables the incremental processing 1323 void enableIncrementalProcessing(bool value = true) { 1324 IncrementalProcessing = value; 1325 } 1326 1327 /// \brief Specify the point at which code-completion will be performed. 1328 /// 1329 /// \param File the file in which code completion should occur. If 1330 /// this file is included multiple times, code-completion will 1331 /// perform completion the first time it is included. If NULL, this 1332 /// function clears out the code-completion point. 1333 /// 1334 /// \param Line the line at which code completion should occur 1335 /// (1-based). 1336 /// 1337 /// \param Column the column at which code completion should occur 1338 /// (1-based). 1339 /// 1340 /// \returns true if an error occurred, false otherwise. 1341 bool SetCodeCompletionPoint(const FileEntry *File, 1342 unsigned Line, unsigned Column); 1343 1344 /// \brief Determine if we are performing code completion. 1345 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1346 1347 /// \brief Returns the location of the code-completion point. 1348 /// 1349 /// Returns an invalid location if code-completion is not enabled or the file 1350 /// containing the code-completion point has not been lexed yet. 1351 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1352 1353 /// \brief Returns the start location of the file of code-completion point. 1354 /// 1355 /// Returns an invalid location if code-completion is not enabled or the file 1356 /// containing the code-completion point has not been lexed yet. 1357 SourceLocation getCodeCompletionFileLoc() const { 1358 return CodeCompletionFileLoc; 1359 } 1360 1361 /// \brief Returns true if code-completion is enabled and we have hit the 1362 /// code-completion point. 1363 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1364 1365 /// \brief Note that we hit the code-completion point. 1366 void setCodeCompletionReached() { 1367 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1368 CodeCompletionReached = true; 1369 // Silence any diagnostics that occur after we hit the code-completion. 1370 getDiagnostics().setSuppressAllDiagnostics(true); 1371 } 1372 1373 /// \brief The location of the currently-active \#pragma clang 1374 /// arc_cf_code_audited begin. 1375 /// 1376 /// Returns an invalid location if there is no such pragma active. 1377 SourceLocation getPragmaARCCFCodeAuditedLoc() const { 1378 return PragmaARCCFCodeAuditedLoc; 1379 } 1380 1381 /// \brief Set the location of the currently-active \#pragma clang 1382 /// arc_cf_code_audited begin. An invalid location ends the pragma. 1383 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { 1384 PragmaARCCFCodeAuditedLoc = Loc; 1385 } 1386 1387 /// \brief The location of the currently-active \#pragma clang 1388 /// assume_nonnull begin. 1389 /// 1390 /// Returns an invalid location if there is no such pragma active. 1391 SourceLocation getPragmaAssumeNonNullLoc() const { 1392 return PragmaAssumeNonNullLoc; 1393 } 1394 1395 /// \brief Set the location of the currently-active \#pragma clang 1396 /// assume_nonnull begin. An invalid location ends the pragma. 1397 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1398 PragmaAssumeNonNullLoc = Loc; 1399 } 1400 1401 /// \brief Set the directory in which the main file should be considered 1402 /// to have been found, if it is not a real file. 1403 void setMainFileDir(const DirectoryEntry *Dir) { 1404 MainFileDir = Dir; 1405 } 1406 1407 /// \brief Instruct the preprocessor to skip part of the main source file. 1408 /// 1409 /// \param Bytes The number of bytes in the preamble to skip. 1410 /// 1411 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1412 /// start of a line. 1413 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1414 SkipMainFilePreamble.first = Bytes; 1415 SkipMainFilePreamble.second = StartOfLine; 1416 } 1417 1418 /// Forwarding function for diagnostics. This emits a diagnostic at 1419 /// the specified Token's location, translating the token's start 1420 /// position in the current buffer into a SourcePosition object for rendering. 1421 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1422 return Diags->Report(Loc, DiagID); 1423 } 1424 1425 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1426 return Diags->Report(Tok.getLocation(), DiagID); 1427 } 1428 1429 /// Return the 'spelling' of the token at the given 1430 /// location; does not go up to the spelling location or down to the 1431 /// expansion location. 1432 /// 1433 /// \param buffer A buffer which will be used only if the token requires 1434 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1435 /// \param invalid If non-null, will be set \c true if an error occurs. 1436 StringRef getSpelling(SourceLocation loc, 1437 SmallVectorImpl<char> &buffer, 1438 bool *invalid = nullptr) const { 1439 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1440 } 1441 1442 /// \brief Return the 'spelling' of the Tok token. 1443 /// 1444 /// The spelling of a token is the characters used to represent the token in 1445 /// the source file after trigraph expansion and escaped-newline folding. In 1446 /// particular, this wants to get the true, uncanonicalized, spelling of 1447 /// things like digraphs, UCNs, etc. 1448 /// 1449 /// \param Invalid If non-null, will be set \c true if an error occurs. 1450 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1451 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1452 } 1453 1454 /// \brief Get the spelling of a token into a preallocated buffer, instead 1455 /// of as an std::string. 1456 /// 1457 /// The caller is required to allocate enough space for the token, which is 1458 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1459 /// actual result is returned. 1460 /// 1461 /// Note that this method may do two possible things: it may either fill in 1462 /// the buffer specified with characters, or it may *change the input pointer* 1463 /// to point to a constant buffer with the data already in it (avoiding a 1464 /// copy). The caller is not allowed to modify the returned buffer pointer 1465 /// if an internal buffer is returned. 1466 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1467 bool *Invalid = nullptr) const { 1468 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1469 } 1470 1471 /// \brief Get the spelling of a token into a SmallVector. 1472 /// 1473 /// Note that the returned StringRef may not point to the 1474 /// supplied buffer if a copy can be avoided. 1475 StringRef getSpelling(const Token &Tok, 1476 SmallVectorImpl<char> &Buffer, 1477 bool *Invalid = nullptr) const; 1478 1479 /// \brief Relex the token at the specified location. 1480 /// \returns true if there was a failure, false on success. 1481 bool getRawToken(SourceLocation Loc, Token &Result, 1482 bool IgnoreWhiteSpace = false) { 1483 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1484 } 1485 1486 /// \brief Given a Token \p Tok that is a numeric constant with length 1, 1487 /// return the character. 1488 char 1489 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1490 bool *Invalid = nullptr) const { 1491 assert(Tok.is(tok::numeric_constant) && 1492 Tok.getLength() == 1 && "Called on unsupported token"); 1493 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1494 1495 // If the token is carrying a literal data pointer, just use it. 1496 if (const char *D = Tok.getLiteralData()) 1497 return *D; 1498 1499 // Otherwise, fall back on getCharacterData, which is slower, but always 1500 // works. 1501 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1502 } 1503 1504 /// \brief Retrieve the name of the immediate macro expansion. 1505 /// 1506 /// This routine starts from a source location, and finds the name of the 1507 /// macro responsible for its immediate expansion. It looks through any 1508 /// intervening macro argument expansions to compute this. It returns a 1509 /// StringRef that refers to the SourceManager-owned buffer of the source 1510 /// where that macro name is spelled. Thus, the result shouldn't out-live 1511 /// the SourceManager. 1512 StringRef getImmediateMacroName(SourceLocation Loc) { 1513 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1514 } 1515 1516 /// \brief Plop the specified string into a scratch buffer and set the 1517 /// specified token's location and length to it. 1518 /// 1519 /// If specified, the source location provides a location of the expansion 1520 /// point of the token. 1521 void CreateString(StringRef Str, Token &Tok, 1522 SourceLocation ExpansionLocStart = SourceLocation(), 1523 SourceLocation ExpansionLocEnd = SourceLocation()); 1524 1525 /// \brief Computes the source location just past the end of the 1526 /// token at this source location. 1527 /// 1528 /// This routine can be used to produce a source location that 1529 /// points just past the end of the token referenced by \p Loc, and 1530 /// is generally used when a diagnostic needs to point just after a 1531 /// token where it expected something different that it received. If 1532 /// the returned source location would not be meaningful (e.g., if 1533 /// it points into a macro), this routine returns an invalid 1534 /// source location. 1535 /// 1536 /// \param Offset an offset from the end of the token, where the source 1537 /// location should refer to. The default offset (0) produces a source 1538 /// location pointing just past the end of the token; an offset of 1 produces 1539 /// a source location pointing to the last character in the token, etc. 1540 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1541 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1542 } 1543 1544 /// \brief Returns true if the given MacroID location points at the first 1545 /// token of the macro expansion. 1546 /// 1547 /// \param MacroBegin If non-null and function returns true, it is set to 1548 /// begin location of the macro. 1549 bool isAtStartOfMacroExpansion(SourceLocation loc, 1550 SourceLocation *MacroBegin = nullptr) const { 1551 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1552 MacroBegin); 1553 } 1554 1555 /// \brief Returns true if the given MacroID location points at the last 1556 /// token of the macro expansion. 1557 /// 1558 /// \param MacroEnd If non-null and function returns true, it is set to 1559 /// end location of the macro. 1560 bool isAtEndOfMacroExpansion(SourceLocation loc, 1561 SourceLocation *MacroEnd = nullptr) const { 1562 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1563 } 1564 1565 /// \brief Print the token to stderr, used for debugging. 1566 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1567 void DumpLocation(SourceLocation Loc) const; 1568 void DumpMacro(const MacroInfo &MI) const; 1569 void dumpMacroInfo(const IdentifierInfo *II); 1570 1571 /// \brief Given a location that specifies the start of a 1572 /// token, return a new location that specifies a character within the token. 1573 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1574 unsigned Char) const { 1575 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1576 } 1577 1578 /// \brief Increment the counters for the number of token paste operations 1579 /// performed. 1580 /// 1581 /// If fast was specified, this is a 'fast paste' case we handled. 1582 void IncrementPasteCounter(bool isFast) { 1583 if (isFast) 1584 ++NumFastTokenPaste; 1585 else 1586 ++NumTokenPaste; 1587 } 1588 1589 void PrintStats(); 1590 1591 size_t getTotalMemory() const; 1592 1593 /// When the macro expander pastes together a comment (/##/) in Microsoft 1594 /// mode, this method handles updating the current state, returning the 1595 /// token on the next source line. 1596 void HandleMicrosoftCommentPaste(Token &Tok); 1597 1598 //===--------------------------------------------------------------------===// 1599 // Preprocessor callback methods. These are invoked by a lexer as various 1600 // directives and events are found. 1601 1602 /// Given a tok::raw_identifier token, look up the 1603 /// identifier information for the token and install it into the token, 1604 /// updating the token kind accordingly. 1605 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1606 1607private: 1608 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1609 1610public: 1611 1612 /// \brief Specifies the reason for poisoning an identifier. 1613 /// 1614 /// If that identifier is accessed while poisoned, then this reason will be 1615 /// used instead of the default "poisoned" diagnostic. 1616 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1617 1618 /// \brief Display reason for poisoned identifier. 1619 void HandlePoisonedIdentifier(Token & Tok); 1620 1621 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1622 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1623 if(II->isPoisoned()) { 1624 HandlePoisonedIdentifier(Identifier); 1625 } 1626 } 1627 } 1628 1629private: 1630 /// Identifiers used for SEH handling in Borland. These are only 1631 /// allowed in particular circumstances 1632 // __except block 1633 IdentifierInfo *Ident__exception_code, 1634 *Ident___exception_code, 1635 *Ident_GetExceptionCode; 1636 // __except filter expression 1637 IdentifierInfo *Ident__exception_info, 1638 *Ident___exception_info, 1639 *Ident_GetExceptionInfo; 1640 // __finally 1641 IdentifierInfo *Ident__abnormal_termination, 1642 *Ident___abnormal_termination, 1643 *Ident_AbnormalTermination; 1644 1645 const char *getCurLexerEndPos(); 1646 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 1647 1648public: 1649 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1650 1651 /// \brief Callback invoked when the lexer reads an identifier and has 1652 /// filled in the tokens IdentifierInfo member. 1653 /// 1654 /// This callback potentially macro expands it or turns it into a named 1655 /// token (like 'for'). 1656 /// 1657 /// \returns true if we actually computed a token, false if we need to 1658 /// lex again. 1659 bool HandleIdentifier(Token &Identifier); 1660 1661 1662 /// \brief Callback invoked when the lexer hits the end of the current file. 1663 /// 1664 /// This either returns the EOF token and returns true, or 1665 /// pops a level off the include stack and returns false, at which point the 1666 /// client should call lex again. 1667 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1668 1669 /// \brief Callback invoked when the current TokenLexer hits the end of its 1670 /// token stream. 1671 bool HandleEndOfTokenLexer(Token &Result); 1672 1673 /// \brief Callback invoked when the lexer sees a # token at the start of a 1674 /// line. 1675 /// 1676 /// This consumes the directive, modifies the lexer/preprocessor state, and 1677 /// advances the lexer(s) so that the next token read is the correct one. 1678 void HandleDirective(Token &Result); 1679 1680 /// \brief Ensure that the next token is a tok::eod token. 1681 /// 1682 /// If not, emit a diagnostic and consume up until the eod. 1683 /// If \p EnableMacros is true, then we consider macros that expand to zero 1684 /// tokens as being ok. 1685 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); 1686 1687 /// \brief Read and discard all tokens remaining on the current line until 1688 /// the tok::eod token is found. 1689 void DiscardUntilEndOfDirective(); 1690 1691 /// \brief Returns true if the preprocessor has seen a use of 1692 /// __DATE__ or __TIME__ in the file so far. 1693 bool SawDateOrTime() const { 1694 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1695 } 1696 unsigned getCounterValue() const { return CounterValue; } 1697 void setCounterValue(unsigned V) { CounterValue = V; } 1698 1699 /// \brief Retrieves the module that we're currently building, if any. 1700 Module *getCurrentModule(); 1701 1702 /// \brief Allocate a new MacroInfo object with the provided SourceLocation. 1703 MacroInfo *AllocateMacroInfo(SourceLocation L); 1704 1705 /// \brief Turn the specified lexer token into a fully checked and spelled 1706 /// filename, e.g. as an operand of \#include. 1707 /// 1708 /// The caller is expected to provide a buffer that is large enough to hold 1709 /// the spelling of the filename, but is also expected to handle the case 1710 /// when this method decides to use a different buffer. 1711 /// 1712 /// \returns true if the input filename was in <>'s or false if it was 1713 /// in ""'s. 1714 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename); 1715 1716 /// \brief Given a "foo" or \<foo> reference, look up the indicated file. 1717 /// 1718 /// Returns null on failure. \p isAngled indicates whether the file 1719 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1720 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, 1721 bool isAngled, const DirectoryLookup *FromDir, 1722 const FileEntry *FromFile, 1723 const DirectoryLookup *&CurDir, 1724 SmallVectorImpl<char> *SearchPath, 1725 SmallVectorImpl<char> *RelativePath, 1726 ModuleMap::KnownHeader *SuggestedModule, 1727 bool *IsMapped, bool SkipCache = false); 1728 1729 /// \brief Get the DirectoryLookup structure used to find the current 1730 /// FileEntry, if CurLexer is non-null and if applicable. 1731 /// 1732 /// This allows us to implement \#include_next and find directory-specific 1733 /// properties. 1734 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 1735 1736 /// \brief Return true if we're in the top-level file, not in a \#include. 1737 bool isInPrimaryFile() const; 1738 1739 /// \brief Handle cases where the \#include name is expanded 1740 /// from a macro as multiple tokens, which need to be glued together. 1741 /// 1742 /// This occurs for code like: 1743 /// \code 1744 /// \#define FOO <x/y.h> 1745 /// \#include FOO 1746 /// \endcode 1747 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one. 1748 /// 1749 /// This code concatenates and consumes tokens up to the '>' token. It 1750 /// returns false if the > was found, otherwise it returns true if it finds 1751 /// and consumes the EOD marker. 1752 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer, 1753 SourceLocation &End); 1754 1755 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is 1756 /// followed by EOD. Return true if the token is not a valid on-off-switch. 1757 bool LexOnOffSwitch(tok::OnOffSwitch &OOS); 1758 1759 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 1760 bool *ShadowFlag = nullptr); 1761 1762 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 1763 Module *LeaveSubmodule(bool ForPragma); 1764 1765private: 1766 void PushIncludeMacroStack() { 1767 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 1768 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 1769 std::move(CurLexer), std::move(CurPTHLexer), 1770 CurPPLexer, std::move(CurTokenLexer), 1771 CurDirLookup); 1772 CurPPLexer = nullptr; 1773 } 1774 1775 void PopIncludeMacroStack() { 1776 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 1777 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer); 1778 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 1779 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 1780 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 1781 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 1782 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 1783 IncludeMacroStack.pop_back(); 1784 } 1785 1786 void PropagateLineStartLeadingSpaceInfo(Token &Result); 1787 1788 /// Determine whether we need to create module macros for #defines in the 1789 /// current context. 1790 bool needModuleMacros() const; 1791 1792 /// Update the set of active module macros and ambiguity flag for a module 1793 /// macro name. 1794 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 1795 1796 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 1797 SourceLocation Loc); 1798 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 1799 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 1800 bool isPublic); 1801 1802 /// \brief Lex and validate a macro name, which occurs after a 1803 /// \#define or \#undef. 1804 /// 1805 /// \param MacroNameTok Token that represents the name defined or undefined. 1806 /// \param IsDefineUndef Kind if preprocessor directive. 1807 /// \param ShadowFlag Points to flag that is set if macro name shadows 1808 /// a keyword. 1809 /// 1810 /// This emits a diagnostic, sets the token kind to eod, 1811 /// and discards the rest of the macro line if the macro name is invalid. 1812 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 1813 bool *ShadowFlag = nullptr); 1814 1815 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 1816 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 1817 /// doing so performs certain validity checks including (but not limited to): 1818 /// - # (stringization) is followed by a macro parameter 1819 /// \param MacroNameTok - Token that represents the macro name 1820 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 1821 /// 1822 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 1823 /// returns a nullptr if an invalid sequence of tokens is encountered. 1824 1825 MacroInfo *ReadOptionalMacroParameterListAndBody( 1826 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 1827 1828 /// The ( starting an argument list of a macro definition has just been read. 1829 /// Lex the rest of the parameters and the closing ), updating \p MI with 1830 /// what we learn and saving in \p LastTok the last token read. 1831 /// Return true if an error occurs parsing the arg list. 1832 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 1833 1834 /// We just read a \#if or related directive and decided that the 1835 /// subsequent tokens are in the \#if'd out portion of the 1836 /// file. Lex the rest of the file, until we see an \#endif. If \p 1837 /// FoundNonSkipPortion is true, then we have already emitted code for part of 1838 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 1839 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 1840 /// already seen one so a \#else directive is a duplicate. When this returns, 1841 /// the caller can lex the first valid token. 1842 void SkipExcludedConditionalBlock(const Token &HashToken, 1843 SourceLocation IfTokenLoc, 1844 bool FoundNonSkipPortion, bool FoundElse, 1845 SourceLocation ElseLoc = SourceLocation()); 1846 1847 /// \brief A fast PTH version of SkipExcludedConditionalBlock. 1848 void PTHSkipExcludedConditionalBlock(); 1849 1850 /// Information about the result for evaluating an expression for a 1851 /// preprocessor directive. 1852 struct DirectiveEvalResult { 1853 /// Whether the expression was evaluated as true or not. 1854 bool Conditional; 1855 /// True if the expression contained identifiers that were undefined. 1856 bool IncludedUndefinedIds; 1857 }; 1858 1859 /// \brief Evaluate an integer constant expression that may occur after a 1860 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 1861 /// 1862 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 1863 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 1864 1865 /// \brief Install the standard preprocessor pragmas: 1866 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 1867 void RegisterBuiltinPragmas(); 1868 1869 /// \brief Register builtin macros such as __LINE__ with the identifier table. 1870 void RegisterBuiltinMacros(); 1871 1872 /// If an identifier token is read that is to be expanded as a macro, handle 1873 /// it and return the next token as 'Tok'. If we lexed a token, return true; 1874 /// otherwise the caller should lex again. 1875 bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD); 1876 1877 /// \brief Cache macro expanded tokens for TokenLexers. 1878 // 1879 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1880 /// going to lex in the cache and when it finishes the tokens are removed 1881 /// from the end of the cache. 1882 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 1883 ArrayRef<Token> tokens); 1884 void removeCachedMacroExpandedTokensOfLastLexer(); 1885 friend void TokenLexer::ExpandFunctionArguments(); 1886 1887 /// Determine whether the next preprocessor token to be 1888 /// lexed is a '('. If so, consume the token and return true, if not, this 1889 /// method should have no observable side-effect on the lexed tokens. 1890 bool isNextPPTokenLParen(); 1891 1892 /// After reading "MACRO(", this method is invoked to read all of the formal 1893 /// arguments specified for the macro invocation. Returns null on error. 1894 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 1895 SourceLocation &ExpansionEnd); 1896 1897 /// \brief If an identifier token is read that is to be expanded 1898 /// as a builtin macro, handle it and return the next token as 'Tok'. 1899 void ExpandBuiltinMacro(Token &Tok); 1900 1901 /// \brief Read a \c _Pragma directive, slice it up, process it, then 1902 /// return the first token after the directive. 1903 /// This assumes that the \c _Pragma token has just been read into \p Tok. 1904 void Handle_Pragma(Token &Tok); 1905 1906 /// \brief Like Handle_Pragma except the pragma text is not enclosed within 1907 /// a string literal. 1908 void HandleMicrosoft__pragma(Token &Tok); 1909 1910 /// \brief Add a lexer to the top of the include stack and 1911 /// start lexing tokens from it instead of the current buffer. 1912 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 1913 1914 /// \brief Add a lexer to the top of the include stack and 1915 /// start getting tokens from it using the PTH cache. 1916 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); 1917 1918 /// \brief Set the FileID for the preprocessor predefines. 1919 void setPredefinesFileID(FileID FID) { 1920 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 1921 PredefinesFileID = FID; 1922 } 1923 1924 /// \brief Returns true if we are lexing from a file and not a 1925 /// pragma or a macro. 1926 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 1927 return L ? !L->isPragmaLexer() : P != nullptr; 1928 } 1929 1930 static bool IsFileLexer(const IncludeStackInfo& I) { 1931 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 1932 } 1933 1934 bool IsFileLexer() const { 1935 return IsFileLexer(CurLexer.get(), CurPPLexer); 1936 } 1937 1938 //===--------------------------------------------------------------------===// 1939 // Caching stuff. 1940 void CachingLex(Token &Result); 1941 bool InCachingLexMode() const { 1942 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 1943 // that we are past EOF, not that we are in CachingLex mode. 1944 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer && 1945 !IncludeMacroStack.empty(); 1946 } 1947 void EnterCachingLexMode(); 1948 void ExitCachingLexMode() { 1949 if (InCachingLexMode()) 1950 RemoveTopOfLexerStack(); 1951 } 1952 const Token &PeekAhead(unsigned N); 1953 void AnnotatePreviousCachedTokens(const Token &Tok); 1954 1955 //===--------------------------------------------------------------------===// 1956 /// Handle*Directive - implement the various preprocessor directives. These 1957 /// should side-effect the current preprocessor object so that the next call 1958 /// to Lex() will return the appropriate token next. 1959 void HandleLineDirective(); 1960 void HandleDigitDirective(Token &Tok); 1961 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 1962 void HandleIdentSCCSDirective(Token &Tok); 1963 void HandleMacroPublicDirective(Token &Tok); 1964 void HandleMacroPrivateDirective(); 1965 1966 // File inclusion. 1967 void HandleIncludeDirective(SourceLocation HashLoc, 1968 Token &Tok, 1969 const DirectoryLookup *LookupFrom = nullptr, 1970 const FileEntry *LookupFromFile = nullptr, 1971 bool isImport = false); 1972 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 1973 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 1974 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 1975 void HandleMicrosoftImportDirective(Token &Tok); 1976 1977public: 1978 /// Check that the given module is available, producing a diagnostic if not. 1979 /// \return \c true if the check failed (because the module is not available). 1980 /// \c false if the module appears to be usable. 1981 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 1982 const TargetInfo &TargetInfo, 1983 DiagnosticsEngine &Diags, Module *M); 1984 1985 // Module inclusion testing. 1986 /// \brief Find the module that owns the source or header file that 1987 /// \p Loc points to. If the location is in a file that was included 1988 /// into a module, or is outside any module, returns nullptr. 1989 Module *getModuleForLocation(SourceLocation Loc); 1990 1991 /// \brief We want to produce a diagnostic at location IncLoc concerning a 1992 /// missing module import. 1993 /// 1994 /// \param IncLoc The location at which the missing import was detected. 1995 /// \param M The desired module. 1996 /// \param MLoc A location within the desired module at which some desired 1997 /// effect occurred (eg, where a desired entity was declared). 1998 /// 1999 /// \return A file that can be #included to import a module containing MLoc. 2000 /// Null if no such file could be determined or if a #include is not 2001 /// appropriate. 2002 const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2003 Module *M, 2004 SourceLocation MLoc); 2005 2006 bool isRecordingPreamble() const { 2007 return PreambleConditionalStack.isRecording(); 2008 } 2009 2010 bool hasRecordedPreamble() const { 2011 return PreambleConditionalStack.hasRecordedPreamble(); 2012 } 2013 2014 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2015 return PreambleConditionalStack.getStack(); 2016 } 2017 2018 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2019 PreambleConditionalStack.setStack(s); 2020 } 2021 2022 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2023 PreambleConditionalStack.startReplaying(); 2024 PreambleConditionalStack.setStack(s); 2025 } 2026 2027private: 2028 /// \brief After processing predefined file, initialize the conditional stack from 2029 /// the preamble. 2030 void replayPreambleConditionalStack(); 2031 2032 // Macro handling. 2033 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); 2034 void HandleUndefDirective(); 2035 2036 // Conditional Inclusion. 2037 void HandleIfdefDirective(Token &Tok, const Token &HashToken, 2038 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2039 void HandleIfDirective(Token &Tok, const Token &HashToken, 2040 bool ReadAnyTokensBeforeDirective); 2041 void HandleEndifDirective(Token &Tok); 2042 void HandleElseDirective(Token &Tok, const Token &HashToken); 2043 void HandleElifDirective(Token &Tok, const Token &HashToken); 2044 2045 // Pragmas. 2046 void HandlePragmaDirective(SourceLocation IntroducerLoc, 2047 PragmaIntroducerKind Introducer); 2048public: 2049 void HandlePragmaOnce(Token &OnceTok); 2050 void HandlePragmaMark(); 2051 void HandlePragmaPoison(); 2052 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2053 void HandlePragmaDependency(Token &DependencyTok); 2054 void HandlePragmaPushMacro(Token &Tok); 2055 void HandlePragmaPopMacro(Token &Tok); 2056 void HandlePragmaIncludeAlias(Token &Tok); 2057 void HandlePragmaModuleBuild(Token &Tok); 2058 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2059 2060 // Return true and store the first token only if any CommentHandler 2061 // has inserted some tokens and getCommentRetentionState() is false. 2062 bool HandleComment(Token &Token, SourceRange Comment); 2063 2064 /// \brief A macro is used, update information about macros that need unused 2065 /// warnings. 2066 void markMacroAsUsed(MacroInfo *MI); 2067}; 2068 2069/// \brief Abstract base class that describes a handler that will receive 2070/// source ranges for each of the comments encountered in the source file. 2071class CommentHandler { 2072public: 2073 virtual ~CommentHandler(); 2074 2075 // The handler shall return true if it has pushed any tokens 2076 // to be read using e.g. EnterToken or EnterTokenStream. 2077 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2078}; 2079 2080/// \brief Registry of pragma handlers added by plugins 2081typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry; 2082 2083} // end namespace clang 2084 2085#endif 2086