PrintPreprocessedOutput.cpp revision b09f6e15c59b89d5820db8ef40598eb1d1323c1f
1//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result.  This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Frontend/Utils.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Lex/PPCallbacks.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Lex/TokenConcatenation.h"
21#include "clang/Basic/SourceManager.h"
22#include "clang/Basic/Diagnostic.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/System/Path.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Config/config.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cstdio>
30using namespace clang;
31
32/// PrintMacroDefinition - Print a macro definition in a form that will be
33/// properly accepted back as a definition.
34static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35                                 Preprocessor &PP, llvm::raw_ostream &OS) {
36  OS << "#define " << II.getName();
37
38  if (MI.isFunctionLike()) {
39    OS << '(';
40    if (MI.arg_empty())
41      ;
42    else if (MI.getNumArgs() == 1)
43      OS << (*MI.arg_begin())->getName();
44    else {
45      MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
46      OS << (*AI++)->getName();
47      while (AI != E)
48        OS << ',' << (*AI++)->getName();
49    }
50
51    if (MI.isVariadic()) {
52      if (!MI.arg_empty())
53        OS << ',';
54      OS << "...";
55    }
56    OS << ')';
57  }
58
59  // GCC always emits a space, even if the macro body is empty.  However, do not
60  // want to emit two spaces if the first token has a leading space.
61  if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
62    OS << ' ';
63
64  llvm::SmallVector<char, 128> SpellingBuffer;
65  for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
66       I != E; ++I) {
67    if (I->hasLeadingSpace())
68      OS << ' ';
69
70    // Make sure we have enough space in the spelling buffer.
71    if (I->getLength() < SpellingBuffer.size())
72      SpellingBuffer.resize(I->getLength());
73    const char *Buffer = &SpellingBuffer[0];
74    unsigned SpellingLen = PP.getSpelling(*I, Buffer);
75    OS.write(Buffer, SpellingLen);
76  }
77}
78
79//===----------------------------------------------------------------------===//
80// Preprocessed token printer
81//===----------------------------------------------------------------------===//
82
83namespace {
84class PrintPPOutputPPCallbacks : public PPCallbacks {
85  Preprocessor &PP;
86  TokenConcatenation ConcatInfo;
87public:
88  llvm::raw_ostream &OS;
89private:
90  unsigned CurLine;
91  bool EmittedTokensOnThisLine;
92  SrcMgr::CharacteristicKind FileType;
93  llvm::SmallString<512> CurFilename;
94  bool Initialized;
95  bool DisableLineMarkers;
96  bool DumpDefines;
97public:
98  PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os,
99                           bool lineMarkers, bool defines)
100     : PP(pp), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers),
101       DumpDefines(defines) {
102    CurLine = 0;
103    CurFilename += "<uninit>";
104    EmittedTokensOnThisLine = false;
105    FileType = SrcMgr::C_User;
106    Initialized = false;
107  }
108
109  void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
110  bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
111
112  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
113                           SrcMgr::CharacteristicKind FileType);
114  virtual void Ident(SourceLocation Loc, const std::string &str);
115  virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
116                             const std::string &Str);
117
118
119  bool HandleFirstTokOnLine(Token &Tok);
120  bool MoveToLine(SourceLocation Loc);
121  bool AvoidConcat(const Token &PrevTok, const Token &Tok) {
122    return ConcatInfo.AvoidConcat(PrevTok, Tok);
123  }
124  void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
125
126  /// MacroDefined - This hook is called whenever a macro definition is seen.
127  void MacroDefined(const IdentifierInfo *II, const MacroInfo *MI);
128
129};
130}  // end anonymous namespace
131
132void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
133                                             const char *Extra,
134                                             unsigned ExtraLen) {
135  if (EmittedTokensOnThisLine) {
136    OS << '\n';
137    EmittedTokensOnThisLine = false;
138  }
139
140  OS << '#' << ' ' << LineNo << ' ' << '"';
141  OS.write(&CurFilename[0], CurFilename.size());
142  OS << '"';
143
144  if (ExtraLen)
145    OS.write(Extra, ExtraLen);
146
147  if (FileType == SrcMgr::C_System)
148    OS.write(" 3", 2);
149  else if (FileType == SrcMgr::C_ExternCSystem)
150    OS.write(" 3 4", 4);
151  OS << '\n';
152}
153
154/// MoveToLine - Move the output to the source line specified by the location
155/// object.  We can do this by emitting some number of \n's, or be emitting a
156/// #line directive.  This returns false if already at the specified line, true
157/// if some newlines were emitted.
158bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
159  unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
160
161  if (DisableLineMarkers) {
162    if (LineNo == CurLine) return false;
163
164    CurLine = LineNo;
165
166    if (!EmittedTokensOnThisLine)
167      return true;
168
169    OS << '\n';
170    EmittedTokensOnThisLine = false;
171    return true;
172  }
173
174  // If this line is "close enough" to the original line, just print newlines,
175  // otherwise print a #line directive.
176  if (LineNo-CurLine <= 8) {
177    if (LineNo-CurLine == 1)
178      OS << '\n';
179    else if (LineNo == CurLine)
180      return false;    // Spelling line moved, but instantiation line didn't.
181    else {
182      const char *NewLines = "\n\n\n\n\n\n\n\n";
183      OS.write(NewLines, LineNo-CurLine);
184    }
185  } else {
186    WriteLineInfo(LineNo, 0, 0);
187  }
188
189  CurLine = LineNo;
190  return true;
191}
192
193
194/// FileChanged - Whenever the preprocessor enters or exits a #include file
195/// it invokes this handler.  Update our conception of the current source
196/// position.
197void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
198                                           FileChangeReason Reason,
199                                       SrcMgr::CharacteristicKind NewFileType) {
200  // Unless we are exiting a #include, make sure to skip ahead to the line the
201  // #include directive was at.
202  SourceManager &SourceMgr = PP.getSourceManager();
203  if (Reason == PPCallbacks::EnterFile) {
204    SourceLocation IncludeLoc = SourceMgr.getPresumedLoc(Loc).getIncludeLoc();
205    if (IncludeLoc.isValid())
206      MoveToLine(IncludeLoc);
207  } else if (Reason == PPCallbacks::SystemHeaderPragma) {
208    MoveToLine(Loc);
209
210    // TODO GCC emits the # directive for this directive on the line AFTER the
211    // directive and emits a bunch of spaces that aren't needed.  Emulate this
212    // strange behavior.
213  }
214
215  Loc = SourceMgr.getInstantiationLoc(Loc);
216  // FIXME: Should use presumed line #!
217  CurLine = SourceMgr.getInstantiationLineNumber(Loc);
218
219  if (DisableLineMarkers) return;
220
221  CurFilename.clear();
222  CurFilename += SourceMgr.getPresumedLoc(Loc).getFilename();
223  Lexer::Stringify(CurFilename);
224  FileType = NewFileType;
225
226  if (!Initialized) {
227    WriteLineInfo(CurLine);
228    Initialized = true;
229  }
230
231  switch (Reason) {
232  case PPCallbacks::EnterFile:
233    WriteLineInfo(CurLine, " 1", 2);
234    break;
235  case PPCallbacks::ExitFile:
236    WriteLineInfo(CurLine, " 2", 2);
237    break;
238  case PPCallbacks::SystemHeaderPragma:
239  case PPCallbacks::RenameFile:
240    WriteLineInfo(CurLine);
241    break;
242  }
243}
244
245/// Ident - Handle #ident directives when read by the preprocessor.
246///
247void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
248  MoveToLine(Loc);
249
250  OS.write("#ident ", strlen("#ident "));
251  OS.write(&S[0], S.size());
252  EmittedTokensOnThisLine = true;
253}
254
255/// MacroDefined - This hook is called whenever a macro definition is seen.
256void PrintPPOutputPPCallbacks::MacroDefined(const IdentifierInfo *II,
257                                            const MacroInfo *MI) {
258  // Only print out macro definitions in -dD mode.
259  if (!DumpDefines ||
260      // Ignore __FILE__ etc.
261      MI->isBuiltinMacro()) return;
262
263  MoveToLine(MI->getDefinitionLoc());
264  PrintMacroDefinition(*II, *MI, PP, OS);
265}
266
267
268void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
269                                             const IdentifierInfo *Kind,
270                                             const std::string &Str) {
271  MoveToLine(Loc);
272  OS << "#pragma comment(" << Kind->getName();
273
274  if (!Str.empty()) {
275    OS << ", \"";
276
277    for (unsigned i = 0, e = Str.size(); i != e; ++i) {
278      unsigned char Char = Str[i];
279      if (isprint(Char) && Char != '\\' && Char != '"')
280        OS << (char)Char;
281      else  // Output anything hard as an octal escape.
282        OS << '\\'
283           << (char)('0'+ ((Char >> 6) & 7))
284           << (char)('0'+ ((Char >> 3) & 7))
285           << (char)('0'+ ((Char >> 0) & 7));
286    }
287    OS << '"';
288  }
289
290  OS << ')';
291  EmittedTokensOnThisLine = true;
292}
293
294
295/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
296/// is called for the first token on each new line.  If this really is the start
297/// of a new logical line, handle it and return true, otherwise return false.
298/// This may not be the start of a logical line because the "start of line"
299/// marker is set for spelling lines, not instantiation ones.
300bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
301  // Figure out what line we went to and insert the appropriate number of
302  // newline characters.
303  if (!MoveToLine(Tok.getLocation()))
304    return false;
305
306  // Print out space characters so that the first token on a line is
307  // indented for easy reading.
308  const SourceManager &SourceMgr = PP.getSourceManager();
309  unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
310
311  // This hack prevents stuff like:
312  // #define HASH #
313  // HASH define foo bar
314  // From having the # character end up at column 1, which makes it so it
315  // is not handled as a #define next time through the preprocessor if in
316  // -fpreprocessed mode.
317  if (ColNo <= 1 && Tok.is(tok::hash))
318    OS << ' ';
319
320  // Otherwise, indent the appropriate number of spaces.
321  for (; ColNo > 1; --ColNo)
322    OS << ' ';
323
324  return true;
325}
326
327namespace {
328struct UnknownPragmaHandler : public PragmaHandler {
329  const char *Prefix;
330  PrintPPOutputPPCallbacks *Callbacks;
331
332  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
333    : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
334  virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
335    // Figure out what line we went to and insert the appropriate number of
336    // newline characters.
337    Callbacks->MoveToLine(PragmaTok.getLocation());
338    Callbacks->OS.write(Prefix, strlen(Prefix));
339
340    // Read and print all of the pragma tokens.
341    while (PragmaTok.isNot(tok::eom)) {
342      if (PragmaTok.hasLeadingSpace())
343        Callbacks->OS << ' ';
344      std::string TokSpell = PP.getSpelling(PragmaTok);
345      Callbacks->OS.write(&TokSpell[0], TokSpell.size());
346      PP.LexUnexpandedToken(PragmaTok);
347    }
348    Callbacks->OS << '\n';
349  }
350};
351} // end anonymous namespace
352
353
354static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
355                                    PrintPPOutputPPCallbacks *Callbacks,
356                                    llvm::raw_ostream &OS) {
357  char Buffer[256];
358  Token PrevTok;
359  while (1) {
360
361    // If this token is at the start of a line, emit newlines if needed.
362    if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
363      // done.
364    } else if (Tok.hasLeadingSpace() ||
365               // If we haven't emitted a token on this line yet, PrevTok isn't
366               // useful to look at and no concatenation could happen anyway.
367               (Callbacks->hasEmittedTokensOnThisLine() &&
368                // Don't print "-" next to "-", it would form "--".
369                Callbacks->AvoidConcat(PrevTok, Tok))) {
370      OS << ' ';
371    }
372
373    if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
374      OS.write(II->getName(), II->getLength());
375    } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
376               Tok.getLiteralData()) {
377      OS.write(Tok.getLiteralData(), Tok.getLength());
378    } else if (Tok.getLength() < 256) {
379      const char *TokPtr = Buffer;
380      unsigned Len = PP.getSpelling(Tok, TokPtr);
381      OS.write(TokPtr, Len);
382    } else {
383      std::string S = PP.getSpelling(Tok);
384      OS.write(&S[0], S.size());
385    }
386    Callbacks->SetEmittedTokensOnThisLine();
387
388    if (Tok.is(tok::eof)) break;
389
390    PrevTok = Tok;
391    PP.Lex(Tok);
392  }
393}
394
395namespace {
396  struct SortMacrosByID {
397    typedef std::pair<IdentifierInfo*, MacroInfo*> id_macro_pair;
398    bool operator()(const id_macro_pair &LHS, const id_macro_pair &RHS) const {
399      return strcmp(LHS.first->getName(), RHS.first->getName()) < 0;
400    }
401  };
402}
403
404void clang::DoPrintMacros(Preprocessor &PP, llvm::raw_ostream *OS) {
405  // -dM mode just scans and ignores all tokens in the files, then dumps out
406  // the macro table at the end.
407  PP.EnterMainSourceFile();
408
409  Token Tok;
410  do PP.Lex(Tok);
411  while (Tok.isNot(tok::eof));
412
413  std::vector<std::pair<IdentifierInfo*, MacroInfo*> > MacrosByID;
414  for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
415       I != E; ++I)
416    MacrosByID.push_back(*I);
417  std::sort(MacrosByID.begin(), MacrosByID.end(), SortMacrosByID());
418
419  for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
420    MacroInfo &MI = *MacrosByID[i].second;
421    // Ignore computed macros like __LINE__ and friends.
422    if (MI.isBuiltinMacro()) continue;
423
424    PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
425    *OS << "\n";
426  }
427}
428
429/// DoPrintPreprocessedInput - This implements -E mode.
430///
431void clang::DoPrintPreprocessedInput(Preprocessor &PP, llvm::raw_ostream *OS,
432                                     bool EnableCommentOutput,
433                                     bool EnableMacroCommentOutput,
434                                     bool DisableLineMarkers,
435                                     bool DumpDefines) {
436  // Inform the preprocessor whether we want it to retain comments or not, due
437  // to -C or -CC.
438  PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
439
440  OS->SetBufferSize(64*1024);
441
442  PrintPPOutputPPCallbacks *Callbacks =
443      new PrintPPOutputPPCallbacks(PP, *OS, DisableLineMarkers, DumpDefines);
444  PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
445  PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",
446                                                      Callbacks));
447
448  PP.setPPCallbacks(Callbacks);
449
450  // After we have configured the preprocessor, enter the main file.
451  PP.EnterMainSourceFile();
452
453  // Consume all of the tokens that come from the predefines buffer.  Those
454  // should not be emitted into the output and are guaranteed to be at the
455  // start.
456  const SourceManager &SourceMgr = PP.getSourceManager();
457  Token Tok;
458  do PP.Lex(Tok);
459  while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
460         !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
461                 "<built-in>"));
462
463  // Read all the preprocessed tokens, printing them out to the stream.
464  PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
465  *OS << '\n';
466}
467
468