Preprocessor.h revision 9594acf32de2939b15eafa8fe818607bfc56bf66
1//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
17#include "clang/Lex/IdentifierTable.h"
18#include "clang/Lex/Lexer.h"
19#include "clang/Lex/MacroExpander.h"
20#include "clang/Basic/SourceLocation.h"
21
22namespace clang {
23
24class SourceManager;
25class FileManager;
26class FileEntry;
27class HeaderSearch;
28class PragmaNamespace;
29class PragmaHandler;
30class ScratchBuffer;
31class TargetInfo;
32class PPCallbacks;
33class DirectoryLookup;
34
35/// Preprocessor - This object forms engages in a tight little dance to
36/// efficiently preprocess tokens.  Lexers know only about tokens within a
37/// single source file, and don't know anything about preprocessor-level issues
38/// like the #include stack, token expansion, etc.
39///
40class Preprocessor {
41  Diagnostic        &Diags;
42  const LangOptions &Features;
43  TargetInfo        &Target;
44  FileManager       &FileMgr;
45  SourceManager     &SourceMgr;
46  ScratchBuffer     *ScratchBuf;
47  HeaderSearch      &HeaderInfo;
48
49  /// Identifiers for builtin macros and other builtins.
50  IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
51  IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
52  IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
53  IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
54  IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
55  IdentifierInfo *Ident_Pragma, *Ident__VA_ARGS__; // _Pragma, __VA_ARGS__
56
57  SourceLocation DATELoc, TIMELoc;
58
59  enum {
60    /// MaxIncludeStackDepth - Maximum depth of #includes.
61    MaxAllowedIncludeStackDepth = 200
62  };
63
64  // State that is set before the preprocessor begins.
65  bool KeepComments : 1;
66  bool KeepMacroComments : 1;
67
68  // State that changes while the preprocessor runs:
69  bool DisableMacroExpansion : 1;  // True if macro expansion is disabled.
70  bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
71
72  /// Identifiers - This is mapping/lookup information for all identifiers in
73  /// the program, including program keywords.
74  IdentifierTable Identifiers;
75
76  /// PragmaHandlers - This tracks all of the pragmas that the client registered
77  /// with this preprocessor.
78  PragmaNamespace *PragmaHandlers;
79
80  /// CurLexer - This is the current top of the stack that we're lexing from if
81  /// not expanding a macro.  One of CurLexer and CurMacroExpander must be null.
82  Lexer *CurLexer;
83
84  /// CurLookup - The DirectoryLookup structure used to find the current
85  /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
86  /// implement #include_next and find directory-specific properties.
87  const DirectoryLookup *CurDirLookup;
88
89  /// CurMacroExpander - This is the current macro we are expanding, if we are
90  /// expanding a macro.  One of CurLexer and CurMacroExpander must be null.
91  MacroExpander *CurMacroExpander;
92
93  /// IncludeMacroStack - This keeps track of the stack of files currently
94  /// #included, and macros currently being expanded from, not counting
95  /// CurLexer/CurMacroExpander.
96  struct IncludeStackInfo {
97    Lexer *TheLexer;
98    const DirectoryLookup *TheDirLookup;
99    MacroExpander *TheMacroExpander;
100    IncludeStackInfo(Lexer *L, const DirectoryLookup *D, MacroExpander *M)
101      : TheLexer(L), TheDirLookup(D), TheMacroExpander(M) {
102    }
103  };
104  std::vector<IncludeStackInfo> IncludeMacroStack;
105
106  /// Callbacks - These are actions invoked when some preprocessor activity is
107  /// encountered (e.g. a file is #included, etc).
108  PPCallbacks *Callbacks;
109
110  // Various statistics we track for performance analysis.
111  unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
112  unsigned NumIf, NumElse, NumEndif;
113  unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
114  unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
115  unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
116  unsigned NumSkipped;
117
118  /// MacroExpanderCache - Cache macro expanders to reduce malloc traffic.
119  enum { MacroExpanderCacheSize = 8 };
120  unsigned NumCachedMacroExpanders;
121  MacroExpander *MacroExpanderCache[MacroExpanderCacheSize];
122public:
123  Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target,
124               SourceManager &SM, HeaderSearch &Headers);
125  ~Preprocessor();
126
127  Diagnostic &getDiagnostics() const { return Diags; }
128  const LangOptions &getLangOptions() const { return Features; }
129  TargetInfo &getTargetInfo() const { return Target; }
130  FileManager &getFileManager() const { return FileMgr; }
131  SourceManager &getSourceManager() const { return SourceMgr; }
132  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
133
134  IdentifierTable &getIdentifierTable() { return Identifiers; }
135
136  /// SetCommentRetentionState - Control whether or not the preprocessor retains
137  /// comments in output.
138  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
139    this->KeepComments = KeepComments | KeepMacroComments;
140    this->KeepMacroComments = KeepMacroComments;
141  }
142
143  bool getCommentRetentionState() const { return KeepComments; }
144
145  /// isCurrentLexer - Return true if we are lexing directly from the specified
146  /// lexer.
147  bool isCurrentLexer(const Lexer *L) const {
148    return CurLexer == L;
149  }
150
151  /// isInPrimaryFile - Return true if we're in the top-level file, not in a
152  /// #include.
153  bool isInPrimaryFile() const;
154
155  /// getCurrentLexer - Return the current file lexer being lexed from.  Note
156  /// that this ignores any potentially active macro expansions and _Pragma
157  /// expansions going on at the time.
158  Lexer *getCurrentFileLexer() const;
159
160  /// getPPCallbacks/SetPPCallbacks - Accessors for preprocessor callbacks.
161  ///
162  PPCallbacks *getPPCallbacks() const { return Callbacks; }
163  void setPPCallbacks(PPCallbacks *C) {
164    Callbacks = C;
165  }
166
167  /// getIdentifierInfo - Return information about the specified preprocessor
168  /// identifier token.  The version of this method that takes two character
169  /// pointers is preferred unless the identifier is already available as a
170  /// string (this avoids allocation and copying of memory to construct an
171  /// std::string).
172  IdentifierInfo *getIdentifierInfo(const char *NameStart,
173                                    const char *NameEnd) {
174    return &Identifiers.get(NameStart, NameEnd);
175  }
176  IdentifierInfo *getIdentifierInfo(const char *NameStr) {
177    return getIdentifierInfo(NameStr, NameStr+strlen(NameStr));
178  }
179
180  /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
181  /// If 'Namespace' is non-null, then it is a token required to exist on the
182  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
183  void AddPragmaHandler(const char *Namespace, PragmaHandler *Handler);
184
185  /// EnterSourceFile - Add a source file to the top of the include stack and
186  /// start lexing tokens from it instead of the current buffer.  If isMainFile
187  /// is true, this is the main file for the translation unit.
188  void EnterSourceFile(unsigned CurFileID, const DirectoryLookup *Dir,
189                       bool isMainFile = false);
190
191  /// EnterMacro - Add a Macro to the top of the include stack and start lexing
192  /// tokens from it instead of the current buffer.  Args specifies the
193  /// tokens input to a function-like macro.
194  void EnterMacro(LexerToken &Identifier, MacroArgs *Args);
195
196  /// EnterTokenStream - Add a "macro" context to the top of the include stack,
197  /// which will cause the lexer to start returning the specified tokens.  Note
198  /// that these tokens will be re-macro-expanded when/if expansion is enabled.
199  /// This method assumes that the specified stream of tokens has a permanent
200  /// owner somewhere, so they do not need to be copied.
201  void EnterTokenStream(const LexerToken *Toks, unsigned NumToks);
202
203  /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
204  /// lexer stack.  This should only be used in situations where the current
205  /// state of the top-of-stack lexer is known.
206  void RemoveTopOfLexerStack();
207
208  /// Lex - To lex a token from the preprocessor, just pull a token from the
209  /// current lexer or macro object.
210  void Lex(LexerToken &Result) {
211    if (CurLexer)
212      CurLexer->Lex(Result);
213    else
214      CurMacroExpander->Lex(Result);
215  }
216
217  /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
218  /// something not a comment.  This is useful in -E -C mode where comments
219  /// would foul up preprocessor directive handling.
220  void LexNonComment(LexerToken &Result) {
221    do
222      Lex(Result);
223    while (Result.getKind() == tok::comment);
224  }
225
226  /// LexUnexpandedToken - This is just like Lex, but this disables macro
227  /// expansion of identifier tokens.
228  void LexUnexpandedToken(LexerToken &Result) {
229    // Disable macro expansion.
230    bool OldVal = DisableMacroExpansion;
231    DisableMacroExpansion = true;
232    // Lex the token.
233    Lex(Result);
234
235    // Reenable it.
236    DisableMacroExpansion = OldVal;
237  }
238
239  /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
240  /// the specified LexerToken's location, translating the token's start
241  /// position in the current buffer into a SourcePosition object for rendering.
242  void Diag(SourceLocation Loc, unsigned DiagID);
243  void Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg);
244  void Diag(const LexerToken &Tok, unsigned DiagID) {
245    Diag(Tok.getLocation(), DiagID);
246  }
247  void Diag(const LexerToken &Tok, unsigned DiagID, const std::string &Msg) {
248    Diag(Tok.getLocation(), DiagID, Msg);
249  }
250
251  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
252  /// token is the characters used to represent the token in the source file
253  /// after trigraph expansion and escaped-newline folding.  In particular, this
254  /// wants to get the true, uncanonicalized, spelling of things like digraphs
255  /// UCNs, etc.
256  std::string getSpelling(const LexerToken &Tok) const;
257
258  /// getSpelling - This method is used to get the spelling of a token into a
259  /// preallocated buffer, instead of as an std::string.  The caller is required
260  /// to allocate enough space for the token, which is guaranteed to be at least
261  /// Tok.getLength() bytes long.  The length of the actual result is returned.
262  ///
263  /// Note that this method may do two possible things: it may either fill in
264  /// the buffer specified with characters, or it may *change the input pointer*
265  /// to point to a constant buffer with the data already in it (avoiding a
266  /// copy).  The caller is not allowed to modify the returned buffer pointer
267  /// if an internal buffer is returned.
268  unsigned getSpelling(const LexerToken &Tok, const char *&Buffer) const;
269
270
271  /// CreateString - Plop the specified string into a scratch buffer and return
272  /// a location for it.  If specified, the source location provides a source
273  /// location for the token.
274  SourceLocation CreateString(const char *Buf, unsigned Len,
275                              SourceLocation SourceLoc = SourceLocation());
276
277  /// DumpToken - Print the token to stderr, used for debugging.
278  ///
279  void DumpToken(const LexerToken &Tok, bool DumpFlags = false) const;
280  void DumpMacro(const MacroInfo &MI) const;
281
282  /// IncrementPasteCounter - Increment the counters for the number of token
283  /// paste operations performed.  If fast was specified, this is a 'fast paste'
284  /// case we handled.
285  ///
286  void IncrementPasteCounter(bool isFast) {
287    if (isFast)
288      ++NumFastTokenPaste;
289    else
290      ++NumTokenPaste;
291  }
292
293  void PrintStats();
294
295  //===--------------------------------------------------------------------===//
296  // Preprocessor callback methods.  These are invoked by a lexer as various
297  // directives and events are found.
298
299  /// LookUpIdentifierInfo - Given a tok::identifier token, look up the
300  /// identifier information for the token and install it into the token.
301  IdentifierInfo *LookUpIdentifierInfo(LexerToken &Identifier,
302                                       const char *BufPtr = 0);
303
304  /// HandleIdentifier - This callback is invoked when the lexer reads an
305  /// identifier and has filled in the tokens IdentifierInfo member.  This
306  /// callback potentially macro expands it or turns it into a named token (like
307  /// 'for').
308  void HandleIdentifier(LexerToken &Identifier);
309
310
311  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
312  /// the current file.  This either returns the EOF token and returns true, or
313  /// pops a level off the include stack and returns false, at which point the
314  /// client should call lex again.
315  bool HandleEndOfFile(LexerToken &Result, bool isEndOfMacro = false);
316
317  /// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
318  /// the current macro line.  It returns true if Result is filled in with a
319  /// token, or false if Lex should be called again.
320  bool HandleEndOfMacro(LexerToken &Result);
321
322  /// HandleDirective - This callback is invoked when the lexer sees a # token
323  /// at the start of a line.  This consumes the directive, modifies the
324  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
325  /// read is the correct one.
326  void HandleDirective(LexerToken &Result);
327
328  /// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
329  /// not, emit a diagnostic and consume up until the eom.
330  void CheckEndOfDirective(const char *Directive);
331private:
332
333  /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
334  /// current line until the tok::eom token is found.
335  void DiscardUntilEndOfDirective();
336
337  /// ReadMacroName - Lex and validate a macro name, which occurs after a
338  /// #define or #undef.  This emits a diagnostic, sets the token kind to eom,
339  /// and discards the rest of the macro line if the macro name is invalid.
340  void ReadMacroName(LexerToken &MacroNameTok, char isDefineUndef = 0);
341
342  /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
343  /// definition has just been read.  Lex the rest of the arguments and the
344  /// closing ), updating MI with what we learn.  Return true if an error occurs
345  /// parsing the arg list.
346  bool ReadMacroDefinitionArgList(MacroInfo *MI);
347
348  /// SkipExcludedConditionalBlock - We just read a #if or related directive and
349  /// decided that the subsequent tokens are in the #if'd out portion of the
350  /// file.  Lex the rest of the file, until we see an #endif.  If
351  /// FoundNonSkipPortion is true, then we have already emitted code for part of
352  /// this #if directive, so #else/#elif blocks should never be entered. If
353  /// FoundElse is false, then #else directives are ok, if not, then we have
354  /// already seen one so a #else directive is a duplicate.  When this returns,
355  /// the caller can lex the first valid token.
356  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
357                                    bool FoundNonSkipPortion, bool FoundElse);
358
359  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
360  /// may occur after a #if or #elif directive and return it as a bool.  If the
361  /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
362  bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
363
364  /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
365  /// #pragma GCC poison/system_header/dependency and #pragma once.
366  void RegisterBuiltinPragmas();
367
368  /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
369  /// identifier table.
370  void RegisterBuiltinMacros();
371  IdentifierInfo *RegisterBuiltinMacro(const char *Name);
372
373  /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
374  /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
375  /// the macro should not be expanded return true, otherwise return false.
376  bool HandleMacroExpandedIdentifier(LexerToken &Tok, MacroInfo *MI);
377
378  /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
379  /// lexed is a '('.  If so, consume the token and return true, if not, this
380  /// method should have no observable side-effect on the lexed tokens.
381  bool isNextPPTokenLParen();
382
383  /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
384  /// invoked to read all of the formal arguments specified for the macro
385  /// invocation.  This returns null on error.
386  MacroArgs *ReadFunctionLikeMacroArgs(LexerToken &MacroName, MacroInfo *MI);
387
388  /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
389  /// as a builtin macro, handle it and return the next token as 'Tok'.
390  void ExpandBuiltinMacro(LexerToken &Tok);
391
392  /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
393  /// return the first token after the directive.  The _Pragma token has just
394  /// been read into 'Tok'.
395  void Handle_Pragma(LexerToken &Tok);
396
397
398  /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
399  /// start lexing tokens from it instead of the current buffer.
400  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
401
402  /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
403  /// checked and spelled filename, e.g. as an operand of #include. This returns
404  /// true if the input filename was in <>'s or false if it were in ""'s.  The
405  /// caller is expected to provide a buffer that is large enough to hold the
406  /// spelling of the filename, but is also expected to handle the case when
407  /// this method decides to use a different buffer.
408  bool GetIncludeFilenameSpelling(const LexerToken &FNTok,
409                                  const char *&BufStart, const char *&BufEnd);
410
411  /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
412  /// return null on failure.  isAngled indicates whether the file reference is
413  /// for system #include's or not (i.e. using <> instead of "").
414  const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd,
415                              bool isAngled, const DirectoryLookup *FromDir,
416                              const DirectoryLookup *&CurDir);
417
418  //===--------------------------------------------------------------------===//
419  /// Handle*Directive - implement the various preprocessor directives.  These
420  /// should side-effect the current preprocessor object so that the next call
421  /// to Lex() will return the appropriate token next.
422
423  void HandleUserDiagnosticDirective(LexerToken &Tok, bool isWarning);
424  void HandleIdentSCCSDirective(LexerToken &Tok);
425
426  // File inclusion.
427  void HandleIncludeDirective(LexerToken &Tok,
428                              const DirectoryLookup *LookupFrom = 0,
429                              bool isImport = false);
430  void HandleIncludeNextDirective(LexerToken &Tok);
431  void HandleImportDirective(LexerToken &Tok);
432
433  // Macro handling.
434  void HandleDefineDirective(LexerToken &Tok, bool isTargetSpecific);
435  void HandleUndefDirective(LexerToken &Tok);
436  void HandleDefineOtherTargetDirective(LexerToken &Tok);
437  // HandleAssertDirective(LexerToken &Tok);
438  // HandleUnassertDirective(LexerToken &Tok);
439
440  // Conditional Inclusion.
441  void HandleIfdefDirective(LexerToken &Tok, bool isIfndef,
442                            bool ReadAnyTokensBeforeDirective);
443  void HandleIfDirective(LexerToken &Tok, bool ReadAnyTokensBeforeDirective);
444  void HandleEndifDirective(LexerToken &Tok);
445  void HandleElseDirective(LexerToken &Tok);
446  void HandleElifDirective(LexerToken &Tok);
447
448  // Pragmas.
449  void HandlePragmaDirective();
450public:
451  void HandlePragmaOnce(LexerToken &OnceTok);
452  void HandlePragmaPoison(LexerToken &PoisonTok);
453  void HandlePragmaSystemHeader(LexerToken &SysHeaderTok);
454  void HandlePragmaDependency(LexerToken &DependencyTok);
455};
456
457}  // end namespace clang
458
459#endif
460