InclusionRewriter.cpp revision 6bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/HeaderSearch.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Lex/Preprocessor.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace clang;
25using namespace llvm;
26
27namespace {
28
29class InclusionRewriter : public PPCallbacks {
30  /// Information about which #includes were actually performed,
31  /// created by preprocessor callbacks.
32  struct FileChange {
33    const Module *Mod;
34    SourceLocation From;
35    FileID Id;
36    SrcMgr::CharacteristicKind FileType;
37    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
38    }
39  };
40  Preprocessor &PP; ///< Used to find inclusion directives.
41  SourceManager &SM; ///< Used to read and manage source files.
42  raw_ostream &OS; ///< The destination stream for rewritten contents.
43  const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
44  bool ShowLineMarkers; ///< Show #line markers.
45  bool UseLineDirective; ///< Use of line directives or line markers.
46  typedef std::map<unsigned, FileChange> FileChangeMap;
47  FileChangeMap FileChanges; ///< Tracks which files were included where.
48  /// Used transitively for building up the FileChanges mapping over the
49  /// various \c PPCallbacks callbacks.
50  FileChangeMap::iterator LastInsertedFileChange;
51public:
52  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
53  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
54  void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
55    PredefinesBuffer = Buf;
56  }
57private:
58  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
59                   SrcMgr::CharacteristicKind FileType,
60                   FileID PrevFID) override;
61  void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
62                   SrcMgr::CharacteristicKind FileType) override;
63  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
64                          StringRef FileName, bool IsAngled,
65                          CharSourceRange FilenameRange, const FileEntry *File,
66                          StringRef SearchPath, StringRef RelativePath,
67                          const Module *Imported) override;
68  void WriteLineInfo(const char *Filename, int Line,
69                     SrcMgr::CharacteristicKind FileType,
70                     StringRef EOL, StringRef Extra = StringRef());
71  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
72  void OutputContentUpTo(const MemoryBuffer &FromFile,
73                         unsigned &WriteFrom, unsigned WriteTo,
74                         StringRef EOL, int &lines,
75                         bool EnsureNewline);
76  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
77                           const MemoryBuffer &FromFile, StringRef EOL,
78                           unsigned &NextToWrite, int &Lines);
79  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
80                        const DirectoryLookup *Lookup, Token &Tok,
81                        bool &FileExists);
82  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
83  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
84};
85
86}  // end anonymous namespace
87
88/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
89InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
90                                     bool ShowLineMarkers)
91  : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr),
92    ShowLineMarkers(ShowLineMarkers),
93    LastInsertedFileChange(FileChanges.end()) {
94  // If we're in microsoft mode, use normal #line instead of line markers.
95  UseLineDirective = PP.getLangOpts().MicrosoftExt;
96}
97
98/// Write appropriate line information as either #line directives or GNU line
99/// markers depending on what mode we're in, including the \p Filename and
100/// \p Line we are located at, using the specified \p EOL line separator, and
101/// any \p Extra context specifiers in GNU line directives.
102void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
103                                      SrcMgr::CharacteristicKind FileType,
104                                      StringRef EOL, StringRef Extra) {
105  if (!ShowLineMarkers)
106    return;
107  if (UseLineDirective) {
108    OS << "#line" << ' ' << Line << ' ' << '"';
109    OS.write_escaped(Filename);
110    OS << '"';
111  } else {
112    // Use GNU linemarkers as described here:
113    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
114    OS << '#' << ' ' << Line << ' ' << '"';
115    OS.write_escaped(Filename);
116    OS << '"';
117    if (!Extra.empty())
118      OS << Extra;
119    if (FileType == SrcMgr::C_System)
120      // "`3' This indicates that the following text comes from a system header
121      // file, so certain warnings should be suppressed."
122      OS << " 3";
123    else if (FileType == SrcMgr::C_ExternCSystem)
124      // as above for `3', plus "`4' This indicates that the following text
125      // should be treated as being wrapped in an implicit extern "C" block."
126      OS << " 3 4";
127  }
128  OS << EOL;
129}
130
131void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
132                                                  StringRef EOL) {
133  OS << "@import " << Mod->getFullModuleName() << ";"
134     << " /* clang -frewrite-includes: implicit import */" << EOL;
135}
136
137/// FileChanged - Whenever the preprocessor enters or exits a #include file
138/// it invokes this handler.
139void InclusionRewriter::FileChanged(SourceLocation Loc,
140                                    FileChangeReason Reason,
141                                    SrcMgr::CharacteristicKind NewFileType,
142                                    FileID) {
143  if (Reason != EnterFile)
144    return;
145  if (LastInsertedFileChange == FileChanges.end())
146    // we didn't reach this file (eg: the main file) via an inclusion directive
147    return;
148  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
149  LastInsertedFileChange->second.FileType = NewFileType;
150  LastInsertedFileChange = FileChanges.end();
151}
152
153/// Called whenever an inclusion is skipped due to canonical header protection
154/// macros.
155void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
156                                    const Token &/*FilenameTok*/,
157                                    SrcMgr::CharacteristicKind /*FileType*/) {
158  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
159    "found via an inclusion directive, was skipped");
160  FileChanges.erase(LastInsertedFileChange);
161  LastInsertedFileChange = FileChanges.end();
162}
163
164/// This should be called whenever the preprocessor encounters include
165/// directives. It does not say whether the file has been included, but it
166/// provides more information about the directive (hash location instead
167/// of location inside the included file). It is assumed that the matching
168/// FileChanged() or FileSkipped() is called after this.
169void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
170                                           const Token &/*IncludeTok*/,
171                                           StringRef /*FileName*/,
172                                           bool /*IsAngled*/,
173                                           CharSourceRange /*FilenameRange*/,
174                                           const FileEntry * /*File*/,
175                                           StringRef /*SearchPath*/,
176                                           StringRef /*RelativePath*/,
177                                           const Module *Imported) {
178  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
179    "directive was found before the previous one was processed");
180  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
181    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
182  assert(p.second && "Unexpected revisitation of the same include directive");
183  if (!Imported)
184    LastInsertedFileChange = p.first;
185}
186
187/// Simple lookup for a SourceLocation (specifically one denoting the hash in
188/// an inclusion directive) in the map of inclusion information, FileChanges.
189const InclusionRewriter::FileChange *
190InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
191  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
192  if (I != FileChanges.end())
193    return &I->second;
194  return nullptr;
195}
196
197/// Detect the likely line ending style of \p FromFile by examining the first
198/// newline found within it.
199static StringRef DetectEOL(const MemoryBuffer &FromFile) {
200  // detect what line endings the file uses, so that added content does not mix
201  // the style
202  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
203  if (!Pos)
204    return "\n";
205  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
206    return "\n\r";
207  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
208    return "\r\n";
209  return "\n";
210}
211
212/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
213/// \p WriteTo - 1.
214void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
215                                          unsigned &WriteFrom, unsigned WriteTo,
216                                          StringRef EOL, int &Line,
217                                          bool EnsureNewline) {
218  if (WriteTo <= WriteFrom)
219    return;
220  if (&FromFile == PredefinesBuffer) {
221    // Ignore the #defines of the predefines buffer.
222    WriteFrom = WriteTo;
223    return;
224  }
225  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
226  // count lines manually, it's faster than getPresumedLoc()
227  Line += std::count(FromFile.getBufferStart() + WriteFrom,
228                     FromFile.getBufferStart() + WriteTo, '\n');
229  if (EnsureNewline) {
230    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
231    if (LastChar != '\n' && LastChar != '\r')
232      OS << EOL;
233  }
234  WriteFrom = WriteTo;
235}
236
237/// Print characters from \p FromFile starting at \p NextToWrite up until the
238/// inclusion directive at \p StartToken, then print out the inclusion
239/// inclusion directive disabled by a #if directive, updating \p NextToWrite
240/// and \p Line to track the number of source lines visited and the progress
241/// through the \p FromFile buffer.
242void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
243                                            const Token &StartToken,
244                                            const MemoryBuffer &FromFile,
245                                            StringRef EOL,
246                                            unsigned &NextToWrite, int &Line) {
247  OutputContentUpTo(FromFile, NextToWrite,
248    SM.getFileOffset(StartToken.getLocation()), EOL, Line, false);
249  Token DirectiveToken;
250  do {
251    DirectiveLex.LexFromRawLexer(DirectiveToken);
252  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
253  if (&FromFile == PredefinesBuffer) {
254    // OutputContentUpTo() would not output anything anyway.
255    return;
256  }
257  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
258  OutputContentUpTo(FromFile, NextToWrite,
259    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
260    EOL, Line, true);
261  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
262}
263
264/// Find the next identifier in the pragma directive specified by \p RawToken.
265StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
266                                                Token &RawToken) {
267  RawLex.LexFromRawLexer(RawToken);
268  if (RawToken.is(tok::raw_identifier))
269    PP.LookUpIdentifierInfo(RawToken);
270  if (RawToken.is(tok::identifier))
271    return RawToken.getIdentifierInfo()->getName();
272  return StringRef();
273}
274
275// Expand __has_include and __has_include_next if possible. If there's no
276// definitive answer return false.
277bool InclusionRewriter::HandleHasInclude(
278    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
279    bool &FileExists) {
280  // Lex the opening paren.
281  RawLex.LexFromRawLexer(Tok);
282  if (Tok.isNot(tok::l_paren))
283    return false;
284
285  RawLex.LexFromRawLexer(Tok);
286
287  SmallString<128> FilenameBuffer;
288  StringRef Filename;
289  // Since the raw lexer doesn't give us angle_literals we have to parse them
290  // ourselves.
291  // FIXME: What to do if the file name is a macro?
292  if (Tok.is(tok::less)) {
293    RawLex.LexFromRawLexer(Tok);
294
295    FilenameBuffer += '<';
296    do {
297      if (Tok.is(tok::eod)) // Sanity check.
298        return false;
299
300      if (Tok.is(tok::raw_identifier))
301        PP.LookUpIdentifierInfo(Tok);
302
303      // Get the string piece.
304      SmallVector<char, 128> TmpBuffer;
305      bool Invalid = false;
306      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
307      if (Invalid)
308        return false;
309
310      FilenameBuffer += TmpName;
311
312      RawLex.LexFromRawLexer(Tok);
313    } while (Tok.isNot(tok::greater));
314
315    FilenameBuffer += '>';
316    Filename = FilenameBuffer;
317  } else {
318    if (Tok.isNot(tok::string_literal))
319      return false;
320
321    bool Invalid = false;
322    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
323    if (Invalid)
324      return false;
325  }
326
327  // Lex the closing paren.
328  RawLex.LexFromRawLexer(Tok);
329  if (Tok.isNot(tok::r_paren))
330    return false;
331
332  // Now ask HeaderInfo if it knows about the header.
333  // FIXME: Subframeworks aren't handled here. Do we care?
334  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
335  const DirectoryLookup *CurDir;
336  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
337      Filename, SourceLocation(), isAngled, nullptr, CurDir,
338      PP.getSourceManager().getFileEntryForID(FileId), nullptr, nullptr,
339      nullptr, false);
340
341  FileExists = File != nullptr;
342  return true;
343}
344
345/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
346/// and including content of included files recursively.
347bool InclusionRewriter::Process(FileID FileId,
348                                SrcMgr::CharacteristicKind FileType)
349{
350  bool Invalid;
351  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
352  if (Invalid) // invalid inclusion
353    return false;
354  const char *FileName = FromFile.getBufferIdentifier();
355  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
356  RawLex.SetCommentRetentionState(false);
357
358  StringRef EOL = DetectEOL(FromFile);
359
360  // Per the GNU docs: "1" indicates entering a new file.
361  if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
362    WriteLineInfo(FileName, 1, FileType, EOL, "");
363  else
364    WriteLineInfo(FileName, 1, FileType, EOL, " 1");
365
366  if (SM.getFileIDSize(FileId) == 0)
367    return false;
368
369  // The next byte to be copied from the source file, which may be non-zero if
370  // the lexer handled a BOM.
371  unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
372  assert(SM.getLineNumber(FileId, NextToWrite) == 1);
373  int Line = 1; // The current input file line number.
374
375  Token RawToken;
376  RawLex.LexFromRawLexer(RawToken);
377
378  // TODO: Consider adding a switch that strips possibly unimportant content,
379  // such as comments, to reduce the size of repro files.
380  while (RawToken.isNot(tok::eof)) {
381    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
382      RawLex.setParsingPreprocessorDirective(true);
383      Token HashToken = RawToken;
384      RawLex.LexFromRawLexer(RawToken);
385      if (RawToken.is(tok::raw_identifier))
386        PP.LookUpIdentifierInfo(RawToken);
387      if (RawToken.getIdentifierInfo() != nullptr) {
388        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
389          case tok::pp_include:
390          case tok::pp_include_next:
391          case tok::pp_import: {
392            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
393              Line);
394            if (FileId != PP.getPredefinesFileID())
395              WriteLineInfo(FileName, Line - 1, FileType, EOL, "");
396            StringRef LineInfoExtra;
397            if (const FileChange *Change = FindFileChangeLocation(
398                HashToken.getLocation())) {
399              if (Change->Mod) {
400                WriteImplicitModuleImport(Change->Mod, EOL);
401
402              // else now include and recursively process the file
403              } else if (Process(Change->Id, Change->FileType)) {
404                // and set lineinfo back to this file, if the nested one was
405                // actually included
406                // `2' indicates returning to a file (after having included
407                // another file.
408                LineInfoExtra = " 2";
409              }
410            }
411            // fix up lineinfo (since commented out directive changed line
412            // numbers) for inclusions that were skipped due to header guards
413            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
414            break;
415          }
416          case tok::pp_pragma: {
417            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
418            if (Identifier == "clang" || Identifier == "GCC") {
419              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
420                // keep the directive in, commented out
421                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
422                  NextToWrite, Line);
423                // update our own type
424                FileType = SM.getFileCharacteristic(RawToken.getLocation());
425                WriteLineInfo(FileName, Line, FileType, EOL);
426              }
427            } else if (Identifier == "once") {
428              // keep the directive in, commented out
429              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
430                NextToWrite, Line);
431              WriteLineInfo(FileName, Line, FileType, EOL);
432            }
433            break;
434          }
435          case tok::pp_if:
436          case tok::pp_elif: {
437            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
438                         tok::pp_elif);
439            // Rewrite special builtin macros to avoid pulling in host details.
440            do {
441              // Walk over the directive.
442              RawLex.LexFromRawLexer(RawToken);
443              if (RawToken.is(tok::raw_identifier))
444                PP.LookUpIdentifierInfo(RawToken);
445
446              if (RawToken.is(tok::identifier)) {
447                bool HasFile;
448                SourceLocation Loc = RawToken.getLocation();
449
450                // Rewrite __has_include(x)
451                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
452                  if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
453                                        HasFile))
454                    continue;
455                  // Rewrite __has_include_next(x)
456                } else if (RawToken.getIdentifierInfo()->isStr(
457                               "__has_include_next")) {
458                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
459                  if (Lookup)
460                    ++Lookup;
461
462                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
463                                        HasFile))
464                    continue;
465                } else {
466                  continue;
467                }
468                // Replace the macro with (0) or (1), followed by the commented
469                // out macro for reference.
470                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
471                                  EOL, Line, false);
472                OS << '(' << (int) HasFile << ")/*";
473                OutputContentUpTo(FromFile, NextToWrite,
474                                  SM.getFileOffset(RawToken.getLocation()) +
475                                  RawToken.getLength(),
476                                  EOL, Line, false);
477                OS << "*/";
478              }
479            } while (RawToken.isNot(tok::eod));
480            if (elif) {
481              OutputContentUpTo(FromFile, NextToWrite,
482                                SM.getFileOffset(RawToken.getLocation()) +
483                                    RawToken.getLength(),
484                                EOL, Line, /*EnsureNewLine*/ true);
485              WriteLineInfo(FileName, Line, FileType, EOL);
486            }
487            break;
488          }
489          case tok::pp_endif:
490          case tok::pp_else: {
491            // We surround every #include by #if 0 to comment it out, but that
492            // changes line numbers. These are fixed up right after that, but
493            // the whole #include could be inside a preprocessor conditional
494            // that is not processed. So it is necessary to fix the line
495            // numbers one the next line after each #else/#endif as well.
496            RawLex.SetKeepWhitespaceMode(true);
497            do {
498              RawLex.LexFromRawLexer(RawToken);
499            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
500            OutputContentUpTo(
501                FromFile, NextToWrite,
502                SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
503                EOL, Line, /*EnsureNewLine*/ true);
504            WriteLineInfo(FileName, Line, FileType, EOL);
505            RawLex.SetKeepWhitespaceMode(false);
506          }
507          default:
508            break;
509        }
510      }
511      RawLex.setParsingPreprocessorDirective(false);
512    }
513    RawLex.LexFromRawLexer(RawToken);
514  }
515  OutputContentUpTo(FromFile, NextToWrite,
516    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
517    /*EnsureNewline*/true);
518  return true;
519}
520
521/// InclusionRewriterInInput - Implement -frewrite-includes mode.
522void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
523                                   const PreprocessorOutputOptions &Opts) {
524  SourceManager &SM = PP.getSourceManager();
525  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
526                                                     Opts.ShowLineMarkers);
527  PP.addPPCallbacks(Rewrite);
528  PP.IgnorePragmas();
529
530  // First let the preprocessor process the entire file and call callbacks.
531  // Callbacks will record which #include's were actually performed.
532  PP.EnterMainSourceFile();
533  Token Tok;
534  // Only preprocessor directives matter here, so disable macro expansion
535  // everywhere else as an optimization.
536  // TODO: It would be even faster if the preprocessor could be switched
537  // to a mode where it would parse only preprocessor directives and comments,
538  // nothing else matters for parsing or processing.
539  PP.SetMacroExpansionOnlyInDirectives();
540  do {
541    PP.Lex(Tok);
542  } while (Tok.isNot(tok::eof));
543  Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
544  Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
545  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
546  OS->flush();
547}
548