InclusionRewriter.cpp revision 03409967bd9860ffb2ff6b38126e04493c55567f
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/Preprocessor.h"
19#include "llvm/Support/raw_ostream.h"
20
21using namespace clang;
22using namespace llvm;
23
24namespace {
25
26class InclusionRewriter : public PPCallbacks {
27  /// Information about which #includes were actually performed,
28  /// created by preprocessor callbacks.
29  struct FileChange {
30    const Module *Mod;
31    SourceLocation From;
32    FileID Id;
33    SrcMgr::CharacteristicKind FileType;
34    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
35    }
36  };
37  Preprocessor &PP; ///< Used to find inclusion directives.
38  SourceManager &SM; ///< Used to read and manage source files.
39  raw_ostream &OS; ///< The destination stream for rewritten contents.
40  bool ShowLineMarkers; ///< Show #line markers.
41  bool UseLineDirective; ///< Use of line directives or line markers.
42  typedef std::map<unsigned, FileChange> FileChangeMap;
43  FileChangeMap FileChanges; ///< Tracks which files were included where.
44  /// Used transitively for building up the FileChanges mapping over the
45  /// various \c PPCallbacks callbacks.
46  FileChangeMap::iterator LastInsertedFileChange;
47public:
48  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
49  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
50private:
51  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
52                           SrcMgr::CharacteristicKind FileType,
53                           FileID PrevFID);
54  virtual void FileSkipped(const FileEntry &ParentFile,
55                           const Token &FilenameTok,
56                           SrcMgr::CharacteristicKind FileType);
57  virtual void InclusionDirective(SourceLocation HashLoc,
58                                  const Token &IncludeTok,
59                                  StringRef FileName,
60                                  bool IsAngled,
61                                  CharSourceRange FilenameRange,
62                                  const FileEntry *File,
63                                  StringRef SearchPath,
64                                  StringRef RelativePath,
65                                  const Module *Imported);
66  void WriteLineInfo(const char *Filename, int Line,
67                     SrcMgr::CharacteristicKind FileType,
68                     StringRef EOL, StringRef Extra = StringRef());
69  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
70  void OutputContentUpTo(const MemoryBuffer &FromFile,
71                         unsigned &WriteFrom, unsigned WriteTo,
72                         StringRef EOL, int &lines,
73                         bool EnsureNewline = false);
74  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
75                           const MemoryBuffer &FromFile, StringRef EOL,
76                           unsigned &NextToWrite, int &Lines);
77  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
78  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
79};
80
81}  // end anonymous namespace
82
83/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
84InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
85                                     bool ShowLineMarkers)
86    : PP(PP), SM(PP.getSourceManager()), OS(OS),
87    ShowLineMarkers(ShowLineMarkers),
88    LastInsertedFileChange(FileChanges.end()) {
89  // If we're in microsoft mode, use normal #line instead of line markers.
90  UseLineDirective = PP.getLangOpts().MicrosoftExt;
91}
92
93/// Write appropriate line information as either #line directives or GNU line
94/// markers depending on what mode we're in, including the \p Filename and
95/// \p Line we are located at, using the specified \p EOL line separator, and
96/// any \p Extra context specifiers in GNU line directives.
97void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
98                                      SrcMgr::CharacteristicKind FileType,
99                                      StringRef EOL, StringRef Extra) {
100  if (!ShowLineMarkers)
101    return;
102  if (UseLineDirective) {
103    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
104  } else {
105    // Use GNU linemarkers as described here:
106    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
107    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
108    if (!Extra.empty())
109      OS << Extra;
110    if (FileType == SrcMgr::C_System)
111      // "`3' This indicates that the following text comes from a system header
112      // file, so certain warnings should be suppressed."
113      OS << " 3";
114    else if (FileType == SrcMgr::C_ExternCSystem)
115      // as above for `3', plus "`4' This indicates that the following text
116      // should be treated as being wrapped in an implicit extern "C" block."
117      OS << " 3 4";
118  }
119  OS << EOL;
120}
121
122void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
123                                                  StringRef EOL) {
124  OS << "@import " << Mod->getFullModuleName() << ";"
125     << " /* clang -frewrite-includes: implicit import */" << EOL;
126}
127
128/// FileChanged - Whenever the preprocessor enters or exits a #include file
129/// it invokes this handler.
130void InclusionRewriter::FileChanged(SourceLocation Loc,
131                                    FileChangeReason Reason,
132                                    SrcMgr::CharacteristicKind NewFileType,
133                                    FileID) {
134  if (Reason != EnterFile)
135    return;
136  if (LastInsertedFileChange == FileChanges.end())
137    // we didn't reach this file (eg: the main file) via an inclusion directive
138    return;
139  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
140  LastInsertedFileChange->second.FileType = NewFileType;
141  LastInsertedFileChange = FileChanges.end();
142}
143
144/// Called whenever an inclusion is skipped due to canonical header protection
145/// macros.
146void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
147                                    const Token &/*FilenameTok*/,
148                                    SrcMgr::CharacteristicKind /*FileType*/) {
149  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
150    "found via an inclusion directive, was skipped");
151  FileChanges.erase(LastInsertedFileChange);
152  LastInsertedFileChange = FileChanges.end();
153}
154
155/// This should be called whenever the preprocessor encounters include
156/// directives. It does not say whether the file has been included, but it
157/// provides more information about the directive (hash location instead
158/// of location inside the included file). It is assumed that the matching
159/// FileChanged() or FileSkipped() is called after this.
160void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
161                                           const Token &/*IncludeTok*/,
162                                           StringRef /*FileName*/,
163                                           bool /*IsAngled*/,
164                                           CharSourceRange /*FilenameRange*/,
165                                           const FileEntry * /*File*/,
166                                           StringRef /*SearchPath*/,
167                                           StringRef /*RelativePath*/,
168                                           const Module *Imported) {
169  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
170    "directive was found before the previous one was processed");
171  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
172    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
173  assert(p.second && "Unexpected revisitation of the same include directive");
174  if (!Imported)
175    LastInsertedFileChange = p.first;
176}
177
178/// Simple lookup for a SourceLocation (specifically one denoting the hash in
179/// an inclusion directive) in the map of inclusion information, FileChanges.
180const InclusionRewriter::FileChange *
181InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
182  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
183  if (I != FileChanges.end())
184    return &I->second;
185  return NULL;
186}
187
188/// Detect the likely line ending style of \p FromFile by examining the first
189/// newline found within it.
190static StringRef DetectEOL(const MemoryBuffer &FromFile) {
191  // detect what line endings the file uses, so that added content does not mix
192  // the style
193  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
194  if (Pos == NULL)
195    return "\n";
196  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
197    return "\n\r";
198  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
199    return "\r\n";
200  return "\n";
201}
202
203/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
204/// \p WriteTo - 1.
205void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
206                                          unsigned &WriteFrom, unsigned WriteTo,
207                                          StringRef EOL, int &Line,
208                                          bool EnsureNewline) {
209  if (WriteTo <= WriteFrom)
210    return;
211  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
212  // count lines manually, it's faster than getPresumedLoc()
213  Line += std::count(FromFile.getBufferStart() + WriteFrom,
214                     FromFile.getBufferStart() + WriteTo, '\n');
215  if (EnsureNewline) {
216    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
217    if (LastChar != '\n' && LastChar != '\r')
218      OS << EOL;
219  }
220  WriteFrom = WriteTo;
221}
222
223/// Print characters from \p FromFile starting at \p NextToWrite up until the
224/// inclusion directive at \p StartToken, then print out the inclusion
225/// inclusion directive disabled by a #if directive, updating \p NextToWrite
226/// and \p Line to track the number of source lines visited and the progress
227/// through the \p FromFile buffer.
228void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
229                                            const Token &StartToken,
230                                            const MemoryBuffer &FromFile,
231                                            StringRef EOL,
232                                            unsigned &NextToWrite, int &Line) {
233  OutputContentUpTo(FromFile, NextToWrite,
234    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
235  Token DirectiveToken;
236  do {
237    DirectiveLex.LexFromRawLexer(DirectiveToken);
238  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
239  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
240  OutputContentUpTo(FromFile, NextToWrite,
241    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
242    EOL, Line);
243  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
244}
245
246/// Find the next identifier in the pragma directive specified by \p RawToken.
247StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
248                                                Token &RawToken) {
249  RawLex.LexFromRawLexer(RawToken);
250  if (RawToken.is(tok::raw_identifier))
251    PP.LookUpIdentifierInfo(RawToken);
252  if (RawToken.is(tok::identifier))
253    return RawToken.getIdentifierInfo()->getName();
254  return StringRef();
255}
256
257/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
258/// and including content of included files recursively.
259bool InclusionRewriter::Process(FileID FileId,
260                                SrcMgr::CharacteristicKind FileType)
261{
262  bool Invalid;
263  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
264  if (Invalid) // invalid inclusion
265    return false;
266  const char *FileName = FromFile.getBufferIdentifier();
267  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
268  RawLex.SetCommentRetentionState(false);
269
270  StringRef EOL = DetectEOL(FromFile);
271
272  // Per the GNU docs: "1" indicates the start of a new file.
273  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
274
275  if (SM.getFileIDSize(FileId) == 0)
276    return false;
277
278  // The next byte to be copied from the source file
279  unsigned NextToWrite = 0;
280  int Line = 1; // The current input file line number.
281
282  Token RawToken;
283  RawLex.LexFromRawLexer(RawToken);
284
285  // TODO: Consider adding a switch that strips possibly unimportant content,
286  // such as comments, to reduce the size of repro files.
287  while (RawToken.isNot(tok::eof)) {
288    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
289      RawLex.setParsingPreprocessorDirective(true);
290      Token HashToken = RawToken;
291      RawLex.LexFromRawLexer(RawToken);
292      if (RawToken.is(tok::raw_identifier))
293        PP.LookUpIdentifierInfo(RawToken);
294      if (RawToken.is(tok::identifier)) {
295        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
296          case tok::pp_include:
297          case tok::pp_include_next:
298          case tok::pp_import: {
299            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
300              Line);
301            StringRef LineInfoExtra;
302            if (const FileChange *Change = FindFileChangeLocation(
303                HashToken.getLocation())) {
304              if (Change->Mod) {
305                WriteImplicitModuleImport(Change->Mod, EOL);
306
307              // else now include and recursively process the file
308              } else if (Process(Change->Id, Change->FileType)) {
309                // and set lineinfo back to this file, if the nested one was
310                // actually included
311                // `2' indicates returning to a file (after having included
312                // another file.
313                LineInfoExtra = " 2";
314              }
315            }
316            // fix up lineinfo (since commented out directive changed line
317            // numbers) for inclusions that were skipped due to header guards
318            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
319            break;
320          }
321          case tok::pp_pragma: {
322            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
323            if (Identifier == "clang" || Identifier == "GCC") {
324              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
325                // keep the directive in, commented out
326                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
327                  NextToWrite, Line);
328                // update our own type
329                FileType = SM.getFileCharacteristic(RawToken.getLocation());
330                WriteLineInfo(FileName, Line, FileType, EOL);
331              }
332            } else if (Identifier == "once") {
333              // keep the directive in, commented out
334              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
335                NextToWrite, Line);
336              WriteLineInfo(FileName, Line, FileType, EOL);
337            }
338            break;
339          }
340          default:
341            break;
342        }
343      }
344      RawLex.setParsingPreprocessorDirective(false);
345    }
346    RawLex.LexFromRawLexer(RawToken);
347  }
348  OutputContentUpTo(FromFile, NextToWrite,
349    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
350    /*EnsureNewline*/true);
351  return true;
352}
353
354/// InclusionRewriterInInput - Implement -frewrite-includes mode.
355void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
356                                   const PreprocessorOutputOptions &Opts) {
357  SourceManager &SM = PP.getSourceManager();
358  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
359                                                     Opts.ShowLineMarkers);
360  PP.addPPCallbacks(Rewrite);
361
362  // First let the preprocessor process the entire file and call callbacks.
363  // Callbacks will record which #include's were actually performed.
364  PP.EnterMainSourceFile();
365  Token Tok;
366  // Only preprocessor directives matter here, so disable macro expansion
367  // everywhere else as an optimization.
368  // TODO: It would be even faster if the preprocessor could be switched
369  // to a mode where it would parse only preprocessor directives and comments,
370  // nothing else matters for parsing or processing.
371  PP.SetMacroExpansionOnlyInDirectives();
372  do {
373    PP.Lex(Tok);
374  } while (Tok.isNot(tok::eof));
375  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
376  OS->flush();
377}
378