Preprocessor.h revision d217773f106856a11879ec79dc468efefaf2ee75
1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
17#include "clang/Lex/IdentifierTable.h"
18#include "clang/Lex/Lexer.h"
19#include "clang/Lex/MacroExpander.h"
20#include "clang/Basic/SourceLocation.h"
21
22namespace clang {
23
24class SourceManager;
25class FileManager;
26class FileEntry;
27class HeaderSearch;
28class PragmaNamespace;
29class PragmaHandler;
30class ScratchBuffer;
31class TargetInfo;
32class PPCallbacks;
33class DirectoryLookup;
34
35/// Preprocessor - This object forms engages in a tight little dance to
36/// efficiently preprocess tokens.  Lexers know only about tokens within a
37/// single source file, and don't know anything about preprocessor-level issues
38/// like the #include stack, token expansion, etc.
39///
40class Preprocessor {
41  Diagnostic        &Diags;
42  const LangOptions &Features;
43  TargetInfo        &Target;
44  FileManager       &FileMgr;
45  SourceManager     &SourceMgr;
46  ScratchBuffer     *ScratchBuf;
47  HeaderSearch      &HeaderInfo;
48
49  /// Identifiers for builtin macros and other builtins.
50  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
51  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
52  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
53  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
54  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
55  IdentifierInfo *Ident_Pragma, *Ident__VA_ARGS__; // _Pragma, __VA_ARGS__
56
57  SourceLocation DATELoc, TIMELoc;
58
59  enum {
60    /// MaxIncludeStackDepth - Maximum depth of #includes.
61    MaxAllowedIncludeStackDepth = 200
62  };
63
64  // State that is set before the preprocessor begins.
65  bool KeepComments : 1;
66  bool KeepMacroComments : 1;
67
68  // State that changes while the preprocessor runs:
69  bool DisableMacroExpansion : 1;  // True if macro expansion is disabled.
70  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
71
72  /// Identifiers - This is mapping/lookup information for all identifiers in
73  /// the program, including program keywords.
74  IdentifierTable Identifiers;
75
76  /// PragmaHandlers - This tracks all of the pragmas that the client registered
77  /// with this preprocessor.
78  PragmaNamespace *PragmaHandlers;
79
80  /// CurLexer - This is the current top of the stack that we're lexing from if
81  /// not expanding a macro.  One of CurLexer and CurMacroExpander must be null.
82  Lexer *CurLexer;
83
84  /// CurLookup - The DirectoryLookup structure used to find the current
85  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
86  /// implement #include_next and find directory-specific properties.
87  const DirectoryLookup *CurDirLookup;
88
89  /// CurMacroExpander - This is the current macro we are expanding, if we are
90  /// expanding a macro.  One of CurLexer and CurMacroExpander must be null.
91  MacroExpander *CurMacroExpander;
92
93  /// IncludeMacroStack - This keeps track of the stack of files currently
94  /// #included, and macros currently being expanded from, not counting
95  /// CurLexer/CurMacroExpander.
96  struct IncludeStackInfo {
97    Lexer *TheLexer;
98    const DirectoryLookup *TheDirLookup;
99    MacroExpander *TheMacroExpander;
100    IncludeStackInfo(Lexer *L, const DirectoryLookup *D, MacroExpander *M)
101      : TheLexer(L), TheDirLookup(D), TheMacroExpander(M) {
102    }
103  };
104  std::vector<IncludeStackInfo> IncludeMacroStack;
105
106  /// Callbacks - These are actions invoked when some preprocessor activity is
107  /// encountered (e.g. a file is #included, etc).
108  PPCallbacks *Callbacks;
109
110  // Various statistics we track for performance analysis.
111  unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
112  unsigned NumIf, NumElse, NumEndif;
113  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
114  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
115  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
116  unsigned NumSkipped;
117
118  /// MacroExpanderCache - Cache macro expanders to reduce malloc traffic.
119  enum { MacroExpanderCacheSize = 8 };
120  unsigned NumCachedMacroExpanders;
121  MacroExpander *MacroExpanderCache[MacroExpanderCacheSize];
122public:
123  Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target,
124               SourceManager &SM, HeaderSearch &Headers);
125  ~Preprocessor();
126
127  Diagnostic &getDiagnostics() const { return Diags; }
128  const LangOptions &getLangOptions() const { return Features; }
129  TargetInfo &getTargetInfo() const { return Target; }
130  FileManager &getFileManager() const { return FileMgr; }
131  SourceManager &getSourceManager() const { return SourceMgr; }
132  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
133
134  IdentifierTable &getIdentifierTable() { return Identifiers; }
135
136  /// SetCommentRetentionState - Control whether or not the preprocessor retains
137  /// comments in output.
138  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
139    this->KeepComments = KeepComments | KeepMacroComments;
140    this->KeepMacroComments = KeepMacroComments;
141  }
142
143  bool getCommentRetentionState() const { return KeepComments; }
144
145  /// isCurrentLexer - Return true if we are lexing directly from the specified
146  /// lexer.
147  bool isCurrentLexer(const Lexer *L) const {
148    return CurLexer == L;
149  }
150
151  /// isInPrimaryFile - Return true if we're in the top-level file, not in a
152  /// #include.
153  bool isInPrimaryFile() const;
154
155  /// getCurrentLexer - Return the current file lexer being lexed from.  Note
156  /// that this ignores any potentially active macro expansions and _Pragma
157  /// expansions going on at the time.
158  Lexer *getCurrentFileLexer() const;
159
160  /// getPPCallbacks/SetPPCallbacks - Accessors for preprocessor callbacks.
161  ///
162  PPCallbacks *getPPCallbacks() const { return Callbacks; }
163  void setPPCallbacks(PPCallbacks *C) {
164    Callbacks = C;
165  }
166
167  /// getIdentifierInfo - Return information about the specified preprocessor
168  /// identifier token.  The version of this method that takes two character
169  /// pointers is preferred unless the identifier is already available as a
170  /// string (this avoids allocation and copying of memory to construct an
171  /// std::string).
172  IdentifierInfo *getIdentifierInfo(const char *NameStart,
173                                    const char *NameEnd) {
174    return &Identifiers.get(NameStart, NameEnd);
175  }
176  IdentifierInfo *getIdentifierInfo(const char *NameStr) {
177    return getIdentifierInfo(NameStr, NameStr+strlen(NameStr));
178  }
179
180  /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
181  /// If 'Namespace' is non-null, then it is a token required to exist on the
182  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
183  void AddPragmaHandler(const char *Namespace, PragmaHandler *Handler);
184
185  /// EnterSourceFile - Add a source file to the top of the include stack and
186  /// start lexing tokens from it instead of the current buffer.  If isMainFile
187  /// is true, this is the main file for the translation unit.
188  void EnterSourceFile(unsigned CurFileID, const DirectoryLookup *Dir,
189                       bool isMainFile = false);
190
191  /// EnterMacro - Add a Macro to the top of the include stack and start lexing
192  /// tokens from it instead of the current buffer.  Args specifies the
193  /// tokens input to a function-like macro.
194  void EnterMacro(Token &Identifier, MacroArgs *Args);
195
196  /// EnterTokenStream - Add a "macro" context to the top of the include stack,
197  /// which will cause the lexer to start returning the specified tokens.  Note
198  /// that these tokens will be re-macro-expanded when/if expansion is enabled.
199  /// This method assumes that the specified stream of tokens has a permanent
200  /// owner somewhere, so they do not need to be copied.
201  void EnterTokenStream(const Token *Toks, unsigned NumToks);
202
203  /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
204  /// lexer stack.  This should only be used in situations where the current
205  /// state of the top-of-stack lexer is known.
206  void RemoveTopOfLexerStack();
207
208  /// Lex - To lex a token from the preprocessor, just pull a token from the
209  /// current lexer or macro object.
210  void Lex(Token &Result) {
211    if (CurLexer)
212      CurLexer->Lex(Result);
213    else
214      CurMacroExpander->Lex(Result);
215  }
216
217  /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
218  /// something not a comment.  This is useful in -E -C mode where comments
219  /// would foul up preprocessor directive handling.
220  void LexNonComment(Token &Result) {
221    do
222      Lex(Result);
223    while (Result.getKind() == tok::comment);
224  }
225
226  /// LexUnexpandedToken - This is just like Lex, but this disables macro
227  /// expansion of identifier tokens.
228  void LexUnexpandedToken(Token &Result) {
229    // Disable macro expansion.
230    bool OldVal = DisableMacroExpansion;
231    DisableMacroExpansion = true;
232    // Lex the token.
233    Lex(Result);
234
235    // Reenable it.
236    DisableMacroExpansion = OldVal;
237  }
238
239  /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
240  /// the specified Token's location, translating the token's start
241  /// position in the current buffer into a SourcePosition object for rendering.
242  void Diag(SourceLocation Loc, unsigned DiagID);
243  void Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg);
244  void Diag(const Token &Tok, unsigned DiagID) {
245    Diag(Tok.getLocation(), DiagID);
246  }
247  void Diag(const Token &Tok, unsigned DiagID, const std::string &Msg) {
248    Diag(Tok.getLocation(), DiagID, Msg);
249  }
250
251  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
252  /// token is the characters used to represent the token in the source file
253  /// after trigraph expansion and escaped-newline folding.  In particular, this
254  /// wants to get the true, uncanonicalized, spelling of things like digraphs
255  /// UCNs, etc.
256  std::string getSpelling(const Token &Tok) const;
257
258  /// getSpelling - This method is used to get the spelling of a token into a
259  /// preallocated buffer, instead of as an std::string.  The caller is required
260  /// to allocate enough space for the token, which is guaranteed to be at least
261  /// Tok.getLength() bytes long.  The length of the actual result is returned.
262  ///
263  /// Note that this method may do two possible things: it may either fill in
264  /// the buffer specified with characters, or it may *change the input pointer*
265  /// to point to a constant buffer with the data already in it (avoiding a
266  /// copy).  The caller is not allowed to modify the returned buffer pointer
267  /// if an internal buffer is returned.
268  unsigned getSpelling(const Token &Tok, const char *&Buffer) const;
269
270
271  /// CreateString - Plop the specified string into a scratch buffer and return
272  /// a location for it.  If specified, the source location provides a source
273  /// location for the token.
274  SourceLocation CreateString(const char *Buf, unsigned Len,
275                              SourceLocation SourceLoc = SourceLocation());
276
277  /// DumpToken - Print the token to stderr, used for debugging.
278  ///
279  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
280  void DumpMacro(const MacroInfo &MI) const;
281
282  /// AdvanceToTokenCharacter - Given a location that specifies the start of a
283  /// token, return a new location that specifies a character within the token.
284  SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char);
285
286  /// IncrementPasteCounter - Increment the counters for the number of token
287  /// paste operations performed.  If fast was specified, this is a 'fast paste'
288  /// case we handled.
289  ///
290  void IncrementPasteCounter(bool isFast) {
291    if (isFast)
292      ++NumFastTokenPaste;
293    else
294      ++NumTokenPaste;
295  }
296
297  void PrintStats();
298
299  //===--------------------------------------------------------------------===//
300  // Preprocessor callback methods.  These are invoked by a lexer as various
301  // directives and events are found.
302
303  /// LookUpIdentifierInfo - Given a tok::identifier token, look up the
304  /// identifier information for the token and install it into the token.
305  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier,
306                                       const char *BufPtr = 0);
307
308  /// HandleIdentifier - This callback is invoked when the lexer reads an
309  /// identifier and has filled in the tokens IdentifierInfo member.  This
310  /// callback potentially macro expands it or turns it into a named token (like
311  /// 'for').
312  void HandleIdentifier(Token &Identifier);
313
314
315  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
316  /// the current file.  This either returns the EOF token and returns true, or
317  /// pops a level off the include stack and returns false, at which point the
318  /// client should call lex again.
319  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
320
321  /// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
322  /// the current macro line.  It returns true if Result is filled in with a
323  /// token, or false if Lex should be called again.
324  bool HandleEndOfMacro(Token &Result);
325
326  /// HandleDirective - This callback is invoked when the lexer sees a # token
327  /// at the start of a line.  This consumes the directive, modifies the
328  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
329  /// read is the correct one.
330  void HandleDirective(Token &Result);
331
332  /// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
333  /// not, emit a diagnostic and consume up until the eom.
334  void CheckEndOfDirective(const char *Directive);
335private:
336
337  /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
338  /// current line until the tok::eom token is found.
339  void DiscardUntilEndOfDirective();
340
341  /// ReadMacroName - Lex and validate a macro name, which occurs after a
342  /// #define or #undef.  This emits a diagnostic, sets the token kind to eom,
343  /// and discards the rest of the macro line if the macro name is invalid.
344  void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
345
346  /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
347  /// definition has just been read.  Lex the rest of the arguments and the
348  /// closing ), updating MI with what we learn.  Return true if an error occurs
349  /// parsing the arg list.
350  bool ReadMacroDefinitionArgList(MacroInfo *MI);
351
352  /// SkipExcludedConditionalBlock - We just read a #if or related directive and
353  /// decided that the subsequent tokens are in the #if'd out portion of the
354  /// file.  Lex the rest of the file, until we see an #endif.  If
355  /// FoundNonSkipPortion is true, then we have already emitted code for part of
356  /// this #if directive, so #else/#elif blocks should never be entered. If
357  /// FoundElse is false, then #else directives are ok, if not, then we have
358  /// already seen one so a #else directive is a duplicate.  When this returns,
359  /// the caller can lex the first valid token.
360  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
361                                    bool FoundNonSkipPortion, bool FoundElse);
362
363  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
364  /// may occur after a #if or #elif directive and return it as a bool.  If the
365  /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
366  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
367
368  /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
369  /// #pragma GCC poison/system_header/dependency and #pragma once.
370  void RegisterBuiltinPragmas();
371
372  /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
373  /// identifier table.
374  void RegisterBuiltinMacros();
375  IdentifierInfo *RegisterBuiltinMacro(const char *Name);
376
377  /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
378  /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
379  /// the macro should not be expanded return true, otherwise return false.
380  bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
381
382  /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
383  /// lexed is a '('.  If so, consume the token and return true, if not, this
384  /// method should have no observable side-effect on the lexed tokens.
385  bool isNextPPTokenLParen();
386
387  /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
388  /// invoked to read all of the formal arguments specified for the macro
389  /// invocation.  This returns null on error.
390  MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI);
391
392  /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
393  /// as a builtin macro, handle it and return the next token as 'Tok'.
394  void ExpandBuiltinMacro(Token &Tok);
395
396  /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
397  /// return the first token after the directive.  The _Pragma token has just
398  /// been read into 'Tok'.
399  void Handle_Pragma(Token &Tok);
400
401
402  /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
403  /// start lexing tokens from it instead of the current buffer.
404  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
405
406  /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
407  /// checked and spelled filename, e.g. as an operand of #include. This returns
408  /// true if the input filename was in <>'s or false if it were in ""'s.  The
409  /// caller is expected to provide a buffer that is large enough to hold the
410  /// spelling of the filename, but is also expected to handle the case when
411  /// this method decides to use a different buffer.
412  bool GetIncludeFilenameSpelling(const Token &FNTok,
413                                  const char *&BufStart, const char *&BufEnd);
414
415  /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
416  /// return null on failure.  isAngled indicates whether the file reference is
417  /// for system #include's or not (i.e. using <> instead of "").
418  const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd,
419                              bool isAngled, const DirectoryLookup *FromDir,
420                              const DirectoryLookup *&CurDir);
421
422  //===--------------------------------------------------------------------===//
423  /// Handle*Directive - implement the various preprocessor directives.  These
424  /// should side-effect the current preprocessor object so that the next call
425  /// to Lex() will return the appropriate token next.
426
427  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
428  void HandleIdentSCCSDirective(Token &Tok);
429
430  // File inclusion.
431  void HandleIncludeDirective(Token &Tok,
432                              const DirectoryLookup *LookupFrom = 0,
433                              bool isImport = false);
434  void HandleIncludeNextDirective(Token &Tok);
435  void HandleImportDirective(Token &Tok);
436
437  // Macro handling.
438  void HandleDefineDirective(Token &Tok, bool isTargetSpecific);
439  void HandleUndefDirective(Token &Tok);
440  void HandleDefineOtherTargetDirective(Token &Tok);
441  // HandleAssertDirective(Token &Tok);
442  // HandleUnassertDirective(Token &Tok);
443
444  // Conditional Inclusion.
445  void HandleIfdefDirective(Token &Tok, bool isIfndef,
446                            bool ReadAnyTokensBeforeDirective);
447  void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
448  void HandleEndifDirective(Token &Tok);
449  void HandleElseDirective(Token &Tok);
450  void HandleElifDirective(Token &Tok);
451
452  // Pragmas.
453  void HandlePragmaDirective();
454public:
455  void HandlePragmaOnce(Token &OnceTok);
456  void HandlePragmaPoison(Token &PoisonTok);
457  void HandlePragmaSystemHeader(Token &SysHeaderTok);
458  void HandlePragmaDependency(Token &DependencyTok);
459};
460
461}  // end namespace clang
462
463#endif
464