InclusionRewriter.cpp revision da313592441db36cf4b06be97c4bcc238ee6fa9c
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Lex/Preprocessor.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Frontend/PreprocessorOutputOptions.h"
19#include "llvm/Support/raw_ostream.h"
20
21using namespace clang;
22using namespace llvm;
23
24namespace {
25
26class InclusionRewriter : public PPCallbacks {
27  /// Information about which #includes were actually performed,
28  /// created by preprocessor callbacks.
29  struct FileChange {
30    SourceLocation From;
31    FileID Id;
32    SrcMgr::CharacteristicKind FileType;
33    FileChange(SourceLocation From) : From(From) {
34    }
35  };
36  Preprocessor &PP; ///< Used to find inclusion directives.
37  SourceManager &SM; ///< Used to read and manage source files.
38  raw_ostream &OS; ///< The destination stream for rewritten contents.
39  bool ShowLineMarkers; ///< Show #line markers.
40  bool UseLineDirective; ///< Use of line directives or line markers.
41  typedef std::map<unsigned, FileChange> FileChangeMap;
42  FileChangeMap FileChanges; /// Tracks which files were included where.
43  /// Used transitively for building up the FileChanges mapping over the
44  /// various \c PPCallbacks callbacks.
45  FileChangeMap::iterator LastInsertedFileChange;
46public:
47  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
48  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
49private:
50  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
51                           SrcMgr::CharacteristicKind FileType,
52                           FileID PrevFID);
53  virtual void FileSkipped(const FileEntry &ParentFile,
54                           const Token &FilenameTok,
55                           SrcMgr::CharacteristicKind FileType);
56  virtual void InclusionDirective(SourceLocation HashLoc,
57                                  const Token &IncludeTok,
58                                  StringRef FileName,
59                                  bool IsAngled,
60                                  CharSourceRange FilenameRange,
61                                  const FileEntry *File,
62                                  StringRef SearchPath,
63                                  StringRef RelativePath);
64  void WriteLineInfo(const char *Filename, int Line,
65                     SrcMgr::CharacteristicKind FileType,
66                     StringRef EOL, StringRef Extra = StringRef());
67  void OutputContentUpTo(const MemoryBuffer &FromFile,
68                         unsigned &WriteFrom, unsigned WriteTo,
69                         StringRef EOL, int &lines,
70                         bool EnsureNewline = false);
71  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
72                           const MemoryBuffer &FromFile, StringRef EOL,
73                           unsigned &NextToWrite, int &Lines);
74  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
75  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
76};
77
78}  // end anonymous namespace
79
80/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
81InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
82                                     bool ShowLineMarkers)
83    : PP(PP), SM(PP.getSourceManager()), OS(OS),
84    ShowLineMarkers(ShowLineMarkers),
85    LastInsertedFileChange(FileChanges.end()) {
86  // If we're in microsoft mode, use normal #line instead of line markers.
87  UseLineDirective = PP.getLangOpts().MicrosoftExt;
88}
89
90/// Write appropriate line information as either #line directives or GNU line
91/// markers depending on what mode we're in, including the \p Filename and
92/// \p Line we are located at, using the specified \p EOL line separator, and
93/// any \p Extra context specifiers in GNU line directives.
94void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
95                                      SrcMgr::CharacteristicKind FileType,
96                                      StringRef EOL, StringRef Extra) {
97  if (!ShowLineMarkers)
98    return;
99  if (UseLineDirective) {
100    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
101  } else {
102    // Use GNU linemarkers as described here:
103    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
104    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
105    if (!Extra.empty())
106      OS << Extra;
107    if (FileType == SrcMgr::C_System)
108      // "`3' This indicates that the following text comes from a system header
109      // file, so certain warnings should be suppressed."
110      OS << " 3";
111    else if (FileType == SrcMgr::C_ExternCSystem)
112      // as above for `3', plus "`4' This indicates that the following text
113      // should be treated as being wrapped in an implicit extern "C" block."
114      OS << " 3 4";
115  }
116  OS << EOL;
117}
118
119/// FileChanged - Whenever the preprocessor enters or exits a #include file
120/// it invokes this handler.
121void InclusionRewriter::FileChanged(SourceLocation Loc,
122                                    FileChangeReason Reason,
123                                    SrcMgr::CharacteristicKind NewFileType,
124                                    FileID) {
125  if (Reason != EnterFile)
126    return;
127  if (LastInsertedFileChange == FileChanges.end())
128    // we didn't reach this file (eg: the main file) via an inclusion directive
129    return;
130  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
131  LastInsertedFileChange->second.FileType = NewFileType;
132  LastInsertedFileChange = FileChanges.end();
133}
134
135/// Called whenever an inclusion is skipped due to canonical header protection
136/// macros.
137void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
138                                    const Token &/*FilenameTok*/,
139                                    SrcMgr::CharacteristicKind /*FileType*/) {
140  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
141    "found via an inclusion directive, was skipped");
142  FileChanges.erase(LastInsertedFileChange);
143  LastInsertedFileChange = FileChanges.end();
144}
145
146/// This should be called whenever the preprocessor encounters include
147/// directives. It does not say whether the file has been included, but it
148/// provides more information about the directive (hash location instead
149/// of location inside the included file). It is assumed that the matching
150/// FileChanged() or FileSkipped() is called after this.
151void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
152                                           const Token &/*IncludeTok*/,
153                                           StringRef /*FileName*/,
154                                           bool /*IsAngled*/,
155                                           CharSourceRange /*FilenameRange*/,
156                                           const FileEntry * /*File*/,
157                                           StringRef /*SearchPath*/,
158                                           StringRef /*RelativePath*/) {
159  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
160    "directive was found before the previous one was processed");
161  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
162    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
163  assert(p.second && "Unexpected revisitation of the same include directive");
164  LastInsertedFileChange = p.first;
165}
166
167/// Simple lookup for a SourceLocation (specifically one denoting the hash in
168/// an inclusion directive) in the map of inclusion information, FileChanges.
169const InclusionRewriter::FileChange *
170InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
171  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
172  if (I != FileChanges.end())
173    return &I->second;
174  return NULL;
175}
176
177/// Detect the likely line ending style of \p FromFile by examining the first
178/// newline found within it.
179static StringRef DetectEOL(const MemoryBuffer &FromFile) {
180  // detect what line endings the file uses, so that added content does not mix
181  // the style
182  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
183  if (Pos == NULL)
184    return "\n";
185  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
186    return "\n\r";
187  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
188    return "\r\n";
189  return "\n";
190}
191
192/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
193/// \p WriteTo - 1.
194void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
195                                          unsigned &WriteFrom, unsigned WriteTo,
196                                          StringRef EOL, int &Line,
197                                          bool EnsureNewline) {
198  if (WriteTo <= WriteFrom)
199    return;
200  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
201  // count lines manually, it's faster than getPresumedLoc()
202  Line += std::count(FromFile.getBufferStart() + WriteFrom,
203                     FromFile.getBufferStart() + WriteTo, '\n');
204  if (EnsureNewline) {
205    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
206    if (LastChar != '\n' && LastChar != '\r')
207      OS << EOL;
208  }
209  WriteFrom = WriteTo;
210}
211
212/// Print characters from \p FromFile starting at \p NextToWrite up until the
213/// inclusion directive at \p StartToken, then print out the inclusion
214/// inclusion directive disabled by a #if directive, updating \p NextToWrite
215/// and \p Line to track the number of source lines visited and the progress
216/// through the \p FromFile buffer.
217void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
218                                            const Token &StartToken,
219                                            const MemoryBuffer &FromFile,
220                                            StringRef EOL,
221                                            unsigned &NextToWrite, int &Line) {
222  OutputContentUpTo(FromFile, NextToWrite,
223    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
224  Token DirectiveToken;
225  do {
226    DirectiveLex.LexFromRawLexer(DirectiveToken);
227  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
228  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
229  OutputContentUpTo(FromFile, NextToWrite,
230    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
231    EOL, Line);
232  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
233}
234
235/// Find the next identifier in the pragma directive specified by \p RawToken.
236StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
237                                                Token &RawToken) {
238  RawLex.LexFromRawLexer(RawToken);
239  if (RawToken.is(tok::raw_identifier))
240    PP.LookUpIdentifierInfo(RawToken);
241  if (RawToken.is(tok::identifier))
242    return RawToken.getIdentifierInfo()->getName();
243  return StringRef();
244}
245
246/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
247/// and including content of included files recursively.
248bool InclusionRewriter::Process(FileID FileId,
249                                SrcMgr::CharacteristicKind FileType)
250{
251  bool Invalid;
252  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
253  if (Invalid) // invalid inclusion
254    return true;
255  const char *FileName = FromFile.getBufferIdentifier();
256  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
257  RawLex.SetCommentRetentionState(false);
258
259  StringRef EOL = DetectEOL(FromFile);
260
261  // Per the GNU docs: "1" indicates the start of a new file.
262  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
263
264  if (SM.getFileIDSize(FileId) == 0)
265    return true;
266
267  // The next byte to be copied from the source file
268  unsigned NextToWrite = 0;
269  int Line = 1; // The current input file line number.
270
271  Token RawToken;
272  RawLex.LexFromRawLexer(RawToken);
273
274  // TODO: Consider adding a switch that strips possibly unimportant content,
275  // such as comments, to reduce the size of repro files.
276  while (RawToken.isNot(tok::eof)) {
277    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
278      RawLex.setParsingPreprocessorDirective(true);
279      Token HashToken = RawToken;
280      RawLex.LexFromRawLexer(RawToken);
281      if (RawToken.is(tok::raw_identifier))
282        PP.LookUpIdentifierInfo(RawToken);
283      if (RawToken.is(tok::identifier)) {
284        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
285          case tok::pp_include:
286          case tok::pp_include_next:
287          case tok::pp_import: {
288            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
289              Line);
290            if (const FileChange *Change = FindFileChangeLocation(
291                HashToken.getLocation())) {
292              // now include and recursively process the file
293              if (Process(Change->Id, Change->FileType))
294                // and set lineinfo back to this file, if the nested one was
295                // actually included
296                // `2' indicates returning to a file (after having included
297                // another file.
298                WriteLineInfo(FileName, Line, FileType, EOL, " 2");
299            } else
300              // fix up lineinfo (since commented out directive changed line
301              // numbers) for inclusions that were skipped due to header guards
302              WriteLineInfo(FileName, Line, FileType, EOL);
303            break;
304          }
305          case tok::pp_pragma: {
306            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
307            if (Identifier == "clang" || Identifier == "GCC") {
308              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
309                // keep the directive in, commented out
310                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
311                  NextToWrite, Line);
312                // update our own type
313                FileType = SM.getFileCharacteristic(RawToken.getLocation());
314                WriteLineInfo(FileName, Line, FileType, EOL);
315              }
316            } else if (Identifier == "once") {
317              // keep the directive in, commented out
318              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
319                NextToWrite, Line);
320              WriteLineInfo(FileName, Line, FileType, EOL);
321            }
322            break;
323          }
324          default:
325            break;
326        }
327      }
328      RawLex.setParsingPreprocessorDirective(false);
329    }
330    RawLex.LexFromRawLexer(RawToken);
331  }
332  OutputContentUpTo(FromFile, NextToWrite,
333    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
334    /*EnsureNewline*/true);
335  return true;
336}
337
338/// InclusionRewriterInInput - Implement -frewrite-includes mode.
339void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
340                                   const PreprocessorOutputOptions &Opts) {
341  SourceManager &SM = PP.getSourceManager();
342  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
343                                                     Opts.ShowLineMarkers);
344  PP.addPPCallbacks(Rewrite);
345
346  // First let the preprocessor process the entire file and call callbacks.
347  // Callbacks will record which #include's were actually performed.
348  PP.EnterMainSourceFile();
349  Token Tok;
350  // Only preprocessor directives matter here, so disable macro expansion
351  // everywhere else as an optimization.
352  // TODO: It would be even faster if the preprocessor could be switched
353  // to a mode where it would parse only preprocessor directives and comments,
354  // nothing else matters for parsing or processing.
355  PP.SetMacroExpansionOnlyInDirectives();
356  do {
357    PP.Lex(Tok);
358  } while (Tok.isNot(tok::eof));
359  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
360  OS->flush();
361}
362