InclusionRewriter.cpp revision 596eea7cc26979c952a0b177d024787a99b299df
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/HeaderSearch.h"
19#include "clang/Lex/Preprocessor.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Support/raw_ostream.h"
22
23using namespace clang;
24using namespace llvm;
25
26namespace {
27
28class InclusionRewriter : public PPCallbacks {
29  /// Information about which #includes were actually performed,
30  /// created by preprocessor callbacks.
31  struct FileChange {
32    const Module *Mod;
33    SourceLocation From;
34    FileID Id;
35    SrcMgr::CharacteristicKind FileType;
36    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
37    }
38  };
39  Preprocessor &PP; ///< Used to find inclusion directives.
40  SourceManager &SM; ///< Used to read and manage source files.
41  raw_ostream &OS; ///< The destination stream for rewritten contents.
42  bool ShowLineMarkers; ///< Show #line markers.
43  bool UseLineDirective; ///< Use of line directives or line markers.
44  typedef std::map<unsigned, FileChange> FileChangeMap;
45  FileChangeMap FileChanges; ///< Tracks which files were included where.
46  /// Used transitively for building up the FileChanges mapping over the
47  /// various \c PPCallbacks callbacks.
48  FileChangeMap::iterator LastInsertedFileChange;
49public:
50  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
51  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
52private:
53  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
54                           SrcMgr::CharacteristicKind FileType,
55                           FileID PrevFID);
56  virtual void FileSkipped(const FileEntry &ParentFile,
57                           const Token &FilenameTok,
58                           SrcMgr::CharacteristicKind FileType);
59  virtual void InclusionDirective(SourceLocation HashLoc,
60                                  const Token &IncludeTok,
61                                  StringRef FileName,
62                                  bool IsAngled,
63                                  CharSourceRange FilenameRange,
64                                  const FileEntry *File,
65                                  StringRef SearchPath,
66                                  StringRef RelativePath,
67                                  const Module *Imported);
68  void WriteLineInfo(const char *Filename, int Line,
69                     SrcMgr::CharacteristicKind FileType,
70                     StringRef EOL, StringRef Extra = StringRef());
71  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
72  void OutputContentUpTo(const MemoryBuffer &FromFile,
73                         unsigned &WriteFrom, unsigned WriteTo,
74                         StringRef EOL, int &lines,
75                         bool EnsureNewline = false);
76  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
77                           const MemoryBuffer &FromFile, StringRef EOL,
78                           unsigned &NextToWrite, int &Lines);
79  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
80                        const DirectoryLookup *Lookup, Token &Tok,
81                        bool &FileExists);
82  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
83  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
84};
85
86}  // end anonymous namespace
87
88/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
89InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
90                                     bool ShowLineMarkers)
91    : PP(PP), SM(PP.getSourceManager()), OS(OS),
92    ShowLineMarkers(ShowLineMarkers),
93    LastInsertedFileChange(FileChanges.end()) {
94  // If we're in microsoft mode, use normal #line instead of line markers.
95  UseLineDirective = PP.getLangOpts().MicrosoftExt;
96}
97
98/// Write appropriate line information as either #line directives or GNU line
99/// markers depending on what mode we're in, including the \p Filename and
100/// \p Line we are located at, using the specified \p EOL line separator, and
101/// any \p Extra context specifiers in GNU line directives.
102void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
103                                      SrcMgr::CharacteristicKind FileType,
104                                      StringRef EOL, StringRef Extra) {
105  if (!ShowLineMarkers)
106    return;
107  if (UseLineDirective) {
108    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
109  } else {
110    // Use GNU linemarkers as described here:
111    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
112    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
113    if (!Extra.empty())
114      OS << Extra;
115    if (FileType == SrcMgr::C_System)
116      // "`3' This indicates that the following text comes from a system header
117      // file, so certain warnings should be suppressed."
118      OS << " 3";
119    else if (FileType == SrcMgr::C_ExternCSystem)
120      // as above for `3', plus "`4' This indicates that the following text
121      // should be treated as being wrapped in an implicit extern "C" block."
122      OS << " 3 4";
123  }
124  OS << EOL;
125}
126
127void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
128                                                  StringRef EOL) {
129  OS << "@import " << Mod->getFullModuleName() << ";"
130     << " /* clang -frewrite-includes: implicit import */" << EOL;
131}
132
133/// FileChanged - Whenever the preprocessor enters or exits a #include file
134/// it invokes this handler.
135void InclusionRewriter::FileChanged(SourceLocation Loc,
136                                    FileChangeReason Reason,
137                                    SrcMgr::CharacteristicKind NewFileType,
138                                    FileID) {
139  if (Reason != EnterFile)
140    return;
141  if (LastInsertedFileChange == FileChanges.end())
142    // we didn't reach this file (eg: the main file) via an inclusion directive
143    return;
144  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
145  LastInsertedFileChange->second.FileType = NewFileType;
146  LastInsertedFileChange = FileChanges.end();
147}
148
149/// Called whenever an inclusion is skipped due to canonical header protection
150/// macros.
151void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
152                                    const Token &/*FilenameTok*/,
153                                    SrcMgr::CharacteristicKind /*FileType*/) {
154  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
155    "found via an inclusion directive, was skipped");
156  FileChanges.erase(LastInsertedFileChange);
157  LastInsertedFileChange = FileChanges.end();
158}
159
160/// This should be called whenever the preprocessor encounters include
161/// directives. It does not say whether the file has been included, but it
162/// provides more information about the directive (hash location instead
163/// of location inside the included file). It is assumed that the matching
164/// FileChanged() or FileSkipped() is called after this.
165void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
166                                           const Token &/*IncludeTok*/,
167                                           StringRef /*FileName*/,
168                                           bool /*IsAngled*/,
169                                           CharSourceRange /*FilenameRange*/,
170                                           const FileEntry * /*File*/,
171                                           StringRef /*SearchPath*/,
172                                           StringRef /*RelativePath*/,
173                                           const Module *Imported) {
174  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
175    "directive was found before the previous one was processed");
176  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
177    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
178  assert(p.second && "Unexpected revisitation of the same include directive");
179  if (!Imported)
180    LastInsertedFileChange = p.first;
181}
182
183/// Simple lookup for a SourceLocation (specifically one denoting the hash in
184/// an inclusion directive) in the map of inclusion information, FileChanges.
185const InclusionRewriter::FileChange *
186InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
187  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
188  if (I != FileChanges.end())
189    return &I->second;
190  return NULL;
191}
192
193/// Detect the likely line ending style of \p FromFile by examining the first
194/// newline found within it.
195static StringRef DetectEOL(const MemoryBuffer &FromFile) {
196  // detect what line endings the file uses, so that added content does not mix
197  // the style
198  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
199  if (Pos == NULL)
200    return "\n";
201  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
202    return "\n\r";
203  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
204    return "\r\n";
205  return "\n";
206}
207
208/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
209/// \p WriteTo - 1.
210void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
211                                          unsigned &WriteFrom, unsigned WriteTo,
212                                          StringRef EOL, int &Line,
213                                          bool EnsureNewline) {
214  if (WriteTo <= WriteFrom)
215    return;
216  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
217  // count lines manually, it's faster than getPresumedLoc()
218  Line += std::count(FromFile.getBufferStart() + WriteFrom,
219                     FromFile.getBufferStart() + WriteTo, '\n');
220  if (EnsureNewline) {
221    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
222    if (LastChar != '\n' && LastChar != '\r')
223      OS << EOL;
224  }
225  WriteFrom = WriteTo;
226}
227
228/// Print characters from \p FromFile starting at \p NextToWrite up until the
229/// inclusion directive at \p StartToken, then print out the inclusion
230/// inclusion directive disabled by a #if directive, updating \p NextToWrite
231/// and \p Line to track the number of source lines visited and the progress
232/// through the \p FromFile buffer.
233void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
234                                            const Token &StartToken,
235                                            const MemoryBuffer &FromFile,
236                                            StringRef EOL,
237                                            unsigned &NextToWrite, int &Line) {
238  OutputContentUpTo(FromFile, NextToWrite,
239    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
240  Token DirectiveToken;
241  do {
242    DirectiveLex.LexFromRawLexer(DirectiveToken);
243  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
244  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
245  OutputContentUpTo(FromFile, NextToWrite,
246    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
247    EOL, Line);
248  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
249}
250
251/// Find the next identifier in the pragma directive specified by \p RawToken.
252StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
253                                                Token &RawToken) {
254  RawLex.LexFromRawLexer(RawToken);
255  if (RawToken.is(tok::raw_identifier))
256    PP.LookUpIdentifierInfo(RawToken);
257  if (RawToken.is(tok::identifier))
258    return RawToken.getIdentifierInfo()->getName();
259  return StringRef();
260}
261
262// Expand __has_include and __has_include_next if possible. If there's no
263// definitive answer return false.
264bool InclusionRewriter::HandleHasInclude(
265    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
266    bool &FileExists) {
267  // Lex the opening paren.
268  RawLex.LexFromRawLexer(Tok);
269  if (Tok.isNot(tok::l_paren))
270    return false;
271
272  RawLex.LexFromRawLexer(Tok);
273
274  SmallString<128> FilenameBuffer;
275  StringRef Filename;
276  // Since the raw lexer doesn't give us angle_literals we have to parse them
277  // ourselves.
278  // FIXME: What to do if the file name is a macro?
279  if (Tok.is(tok::less)) {
280    RawLex.LexFromRawLexer(Tok);
281
282    FilenameBuffer += '<';
283    do {
284      if (Tok.is(tok::eod)) // Sanity check.
285        return false;
286
287      if (Tok.is(tok::raw_identifier))
288        PP.LookUpIdentifierInfo(Tok);
289
290      // Get the string piece.
291      SmallVector<char, 128> TmpBuffer;
292      bool Invalid = false;
293      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
294      if (Invalid)
295        return false;
296
297      FilenameBuffer += TmpName;
298
299      RawLex.LexFromRawLexer(Tok);
300    } while (Tok.isNot(tok::greater));
301
302    FilenameBuffer += '>';
303    Filename = FilenameBuffer;
304  } else {
305    if (Tok.isNot(tok::string_literal))
306      return false;
307
308    bool Invalid = false;
309    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
310    if (Invalid)
311      return false;
312  }
313
314  // Lex the closing paren.
315  RawLex.LexFromRawLexer(Tok);
316  if (Tok.isNot(tok::r_paren))
317    return false;
318
319  // Now ask HeaderInfo if it knows about the header.
320  // FIXME: Subframeworks aren't handled here. Do we care?
321  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
322  const DirectoryLookup *CurDir;
323  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
324      Filename, isAngled, 0, CurDir,
325      PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
326
327  FileExists = File != 0;
328  return true;
329}
330
331/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
332/// and including content of included files recursively.
333bool InclusionRewriter::Process(FileID FileId,
334                                SrcMgr::CharacteristicKind FileType)
335{
336  bool Invalid;
337  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
338  if (Invalid) // invalid inclusion
339    return false;
340  const char *FileName = FromFile.getBufferIdentifier();
341  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
342  RawLex.SetCommentRetentionState(false);
343
344  StringRef EOL = DetectEOL(FromFile);
345
346  // Per the GNU docs: "1" indicates the start of a new file.
347  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
348
349  if (SM.getFileIDSize(FileId) == 0)
350    return false;
351
352  // The next byte to be copied from the source file
353  unsigned NextToWrite = 0;
354  int Line = 1; // The current input file line number.
355
356  Token RawToken;
357  RawLex.LexFromRawLexer(RawToken);
358
359  // TODO: Consider adding a switch that strips possibly unimportant content,
360  // such as comments, to reduce the size of repro files.
361  while (RawToken.isNot(tok::eof)) {
362    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
363      RawLex.setParsingPreprocessorDirective(true);
364      Token HashToken = RawToken;
365      RawLex.LexFromRawLexer(RawToken);
366      if (RawToken.is(tok::raw_identifier))
367        PP.LookUpIdentifierInfo(RawToken);
368      if (RawToken.is(tok::identifier) || RawToken.is(tok::kw_if)) {
369        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
370          case tok::pp_include:
371          case tok::pp_include_next:
372          case tok::pp_import: {
373            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
374              Line);
375            StringRef LineInfoExtra;
376            if (const FileChange *Change = FindFileChangeLocation(
377                HashToken.getLocation())) {
378              if (Change->Mod) {
379                WriteImplicitModuleImport(Change->Mod, EOL);
380
381              // else now include and recursively process the file
382              } else if (Process(Change->Id, Change->FileType)) {
383                // and set lineinfo back to this file, if the nested one was
384                // actually included
385                // `2' indicates returning to a file (after having included
386                // another file.
387                LineInfoExtra = " 2";
388              }
389            }
390            // fix up lineinfo (since commented out directive changed line
391            // numbers) for inclusions that were skipped due to header guards
392            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
393            break;
394          }
395          case tok::pp_pragma: {
396            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
397            if (Identifier == "clang" || Identifier == "GCC") {
398              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
399                // keep the directive in, commented out
400                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
401                  NextToWrite, Line);
402                // update our own type
403                FileType = SM.getFileCharacteristic(RawToken.getLocation());
404                WriteLineInfo(FileName, Line, FileType, EOL);
405              }
406            } else if (Identifier == "once") {
407              // keep the directive in, commented out
408              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
409                NextToWrite, Line);
410              WriteLineInfo(FileName, Line, FileType, EOL);
411            }
412            break;
413          }
414          case tok::pp_if:
415          case tok::pp_elif:
416            // Rewrite special builtin macros to avoid pulling in host details.
417            do {
418              // Walk over the directive.
419              RawLex.LexFromRawLexer(RawToken);
420              if (RawToken.is(tok::raw_identifier))
421                PP.LookUpIdentifierInfo(RawToken);
422
423              if (RawToken.is(tok::identifier)) {
424                bool HasFile;
425                SourceLocation Loc = RawToken.getLocation();
426
427                // Rewrite __has_include(x)
428                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
429                  if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
430                    continue;
431                  // Rewrite __has_include_next(x)
432                } else if (RawToken.getIdentifierInfo()->isStr(
433                               "__has_include_next")) {
434                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
435                  if (Lookup)
436                    ++Lookup;
437
438                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
439                                        HasFile))
440                    continue;
441                } else {
442                  continue;
443                }
444                // Replace the macro with (0) or (1), followed by the commented
445                // out macro for reference.
446                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
447                                  EOL, Line);
448                OS << '(' << (int) HasFile << ")/*";
449                OutputContentUpTo(FromFile, NextToWrite,
450                                  SM.getFileOffset(RawToken.getLocation()) +
451                                  RawToken.getLength(),
452                                  EOL, Line);
453                OS << "*/";
454              }
455            } while (RawToken.isNot(tok::eod));
456
457            break;
458          default:
459            break;
460        }
461      }
462      RawLex.setParsingPreprocessorDirective(false);
463    }
464    RawLex.LexFromRawLexer(RawToken);
465  }
466  OutputContentUpTo(FromFile, NextToWrite,
467    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
468    /*EnsureNewline*/true);
469  return true;
470}
471
472/// InclusionRewriterInInput - Implement -frewrite-includes mode.
473void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
474                                   const PreprocessorOutputOptions &Opts) {
475  SourceManager &SM = PP.getSourceManager();
476  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
477                                                     Opts.ShowLineMarkers);
478  PP.addPPCallbacks(Rewrite);
479
480  // First let the preprocessor process the entire file and call callbacks.
481  // Callbacks will record which #include's were actually performed.
482  PP.EnterMainSourceFile();
483  Token Tok;
484  // Only preprocessor directives matter here, so disable macro expansion
485  // everywhere else as an optimization.
486  // TODO: It would be even faster if the preprocessor could be switched
487  // to a mode where it would parse only preprocessor directives and comments,
488  // nothing else matters for parsing or processing.
489  PP.SetMacroExpansionOnlyInDirectives();
490  do {
491    PP.Lex(Tok);
492  } while (Tok.isNot(tok::eof));
493  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
494  OS->flush();
495}
496