InclusionRewriter.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/HeaderSearch.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Lex/Preprocessor.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace clang;
25using namespace llvm;
26
27namespace {
28
29class InclusionRewriter : public PPCallbacks {
30  /// Information about which #includes were actually performed,
31  /// created by preprocessor callbacks.
32  struct FileChange {
33    const Module *Mod;
34    SourceLocation From;
35    FileID Id;
36    SrcMgr::CharacteristicKind FileType;
37    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
38    }
39  };
40  Preprocessor &PP; ///< Used to find inclusion directives.
41  SourceManager &SM; ///< Used to read and manage source files.
42  raw_ostream &OS; ///< The destination stream for rewritten contents.
43  const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
44  bool ShowLineMarkers; ///< Show #line markers.
45  bool UseLineDirective; ///< Use of line directives or line markers.
46  typedef std::map<unsigned, FileChange> FileChangeMap;
47  FileChangeMap FileChanges; ///< Tracks which files were included where.
48  /// Used transitively for building up the FileChanges mapping over the
49  /// various \c PPCallbacks callbacks.
50  FileChangeMap::iterator LastInsertedFileChange;
51public:
52  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
53  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
54  void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
55    PredefinesBuffer = Buf;
56  }
57private:
58  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
59                   SrcMgr::CharacteristicKind FileType,
60                   FileID PrevFID) override;
61  void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
62                   SrcMgr::CharacteristicKind FileType) override;
63  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
64                          StringRef FileName, bool IsAngled,
65                          CharSourceRange FilenameRange, const FileEntry *File,
66                          StringRef SearchPath, StringRef RelativePath,
67                          const Module *Imported) override;
68  void WriteLineInfo(const char *Filename, int Line,
69                     SrcMgr::CharacteristicKind FileType,
70                     StringRef EOL, StringRef Extra = StringRef());
71  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
72  void OutputContentUpTo(const MemoryBuffer &FromFile,
73                         unsigned &WriteFrom, unsigned WriteTo,
74                         StringRef EOL, int &lines,
75                         bool EnsureNewline);
76  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
77                           const MemoryBuffer &FromFile, StringRef EOL,
78                           unsigned &NextToWrite, int &Lines);
79  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
80                        const DirectoryLookup *Lookup, Token &Tok,
81                        bool &FileExists);
82  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
83  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
84};
85
86}  // end anonymous namespace
87
88/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
89InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
90                                     bool ShowLineMarkers)
91    : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(0),
92    ShowLineMarkers(ShowLineMarkers),
93    LastInsertedFileChange(FileChanges.end()) {
94  // If we're in microsoft mode, use normal #line instead of line markers.
95  UseLineDirective = PP.getLangOpts().MicrosoftExt;
96}
97
98/// Write appropriate line information as either #line directives or GNU line
99/// markers depending on what mode we're in, including the \p Filename and
100/// \p Line we are located at, using the specified \p EOL line separator, and
101/// any \p Extra context specifiers in GNU line directives.
102void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
103                                      SrcMgr::CharacteristicKind FileType,
104                                      StringRef EOL, StringRef Extra) {
105  if (!ShowLineMarkers)
106    return;
107  if (UseLineDirective) {
108    OS << "#line" << ' ' << Line << ' ' << '"';
109    OS.write_escaped(Filename);
110    OS << '"';
111  } else {
112    // Use GNU linemarkers as described here:
113    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
114    OS << '#' << ' ' << Line << ' ' << '"';
115    OS.write_escaped(Filename);
116    OS << '"';
117    if (!Extra.empty())
118      OS << Extra;
119    if (FileType == SrcMgr::C_System)
120      // "`3' This indicates that the following text comes from a system header
121      // file, so certain warnings should be suppressed."
122      OS << " 3";
123    else if (FileType == SrcMgr::C_ExternCSystem)
124      // as above for `3', plus "`4' This indicates that the following text
125      // should be treated as being wrapped in an implicit extern "C" block."
126      OS << " 3 4";
127  }
128  OS << EOL;
129}
130
131void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
132                                                  StringRef EOL) {
133  OS << "@import " << Mod->getFullModuleName() << ";"
134     << " /* clang -frewrite-includes: implicit import */" << EOL;
135}
136
137/// FileChanged - Whenever the preprocessor enters or exits a #include file
138/// it invokes this handler.
139void InclusionRewriter::FileChanged(SourceLocation Loc,
140                                    FileChangeReason Reason,
141                                    SrcMgr::CharacteristicKind NewFileType,
142                                    FileID) {
143  if (Reason != EnterFile)
144    return;
145  if (LastInsertedFileChange == FileChanges.end())
146    // we didn't reach this file (eg: the main file) via an inclusion directive
147    return;
148  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
149  LastInsertedFileChange->second.FileType = NewFileType;
150  LastInsertedFileChange = FileChanges.end();
151}
152
153/// Called whenever an inclusion is skipped due to canonical header protection
154/// macros.
155void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
156                                    const Token &/*FilenameTok*/,
157                                    SrcMgr::CharacteristicKind /*FileType*/) {
158  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
159    "found via an inclusion directive, was skipped");
160  FileChanges.erase(LastInsertedFileChange);
161  LastInsertedFileChange = FileChanges.end();
162}
163
164/// This should be called whenever the preprocessor encounters include
165/// directives. It does not say whether the file has been included, but it
166/// provides more information about the directive (hash location instead
167/// of location inside the included file). It is assumed that the matching
168/// FileChanged() or FileSkipped() is called after this.
169void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
170                                           const Token &/*IncludeTok*/,
171                                           StringRef /*FileName*/,
172                                           bool /*IsAngled*/,
173                                           CharSourceRange /*FilenameRange*/,
174                                           const FileEntry * /*File*/,
175                                           StringRef /*SearchPath*/,
176                                           StringRef /*RelativePath*/,
177                                           const Module *Imported) {
178  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
179    "directive was found before the previous one was processed");
180  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
181    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
182  assert(p.second && "Unexpected revisitation of the same include directive");
183  if (!Imported)
184    LastInsertedFileChange = p.first;
185}
186
187/// Simple lookup for a SourceLocation (specifically one denoting the hash in
188/// an inclusion directive) in the map of inclusion information, FileChanges.
189const InclusionRewriter::FileChange *
190InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
191  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
192  if (I != FileChanges.end())
193    return &I->second;
194  return NULL;
195}
196
197/// Detect the likely line ending style of \p FromFile by examining the first
198/// newline found within it.
199static StringRef DetectEOL(const MemoryBuffer &FromFile) {
200  // detect what line endings the file uses, so that added content does not mix
201  // the style
202  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
203  if (Pos == NULL)
204    return "\n";
205  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
206    return "\n\r";
207  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
208    return "\r\n";
209  return "\n";
210}
211
212/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
213/// \p WriteTo - 1.
214void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
215                                          unsigned &WriteFrom, unsigned WriteTo,
216                                          StringRef EOL, int &Line,
217                                          bool EnsureNewline) {
218  if (WriteTo <= WriteFrom)
219    return;
220  if (&FromFile == PredefinesBuffer) {
221    // Ignore the #defines of the predefines buffer.
222    WriteFrom = WriteTo;
223    return;
224  }
225  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
226  // count lines manually, it's faster than getPresumedLoc()
227  Line += std::count(FromFile.getBufferStart() + WriteFrom,
228                     FromFile.getBufferStart() + WriteTo, '\n');
229  if (EnsureNewline) {
230    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
231    if (LastChar != '\n' && LastChar != '\r')
232      OS << EOL;
233  }
234  WriteFrom = WriteTo;
235}
236
237/// Print characters from \p FromFile starting at \p NextToWrite up until the
238/// inclusion directive at \p StartToken, then print out the inclusion
239/// inclusion directive disabled by a #if directive, updating \p NextToWrite
240/// and \p Line to track the number of source lines visited and the progress
241/// through the \p FromFile buffer.
242void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
243                                            const Token &StartToken,
244                                            const MemoryBuffer &FromFile,
245                                            StringRef EOL,
246                                            unsigned &NextToWrite, int &Line) {
247  OutputContentUpTo(FromFile, NextToWrite,
248    SM.getFileOffset(StartToken.getLocation()), EOL, Line, false);
249  Token DirectiveToken;
250  do {
251    DirectiveLex.LexFromRawLexer(DirectiveToken);
252  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
253  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
254  OutputContentUpTo(FromFile, NextToWrite,
255    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
256    EOL, Line, true);
257  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
258}
259
260/// Find the next identifier in the pragma directive specified by \p RawToken.
261StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
262                                                Token &RawToken) {
263  RawLex.LexFromRawLexer(RawToken);
264  if (RawToken.is(tok::raw_identifier))
265    PP.LookUpIdentifierInfo(RawToken);
266  if (RawToken.is(tok::identifier))
267    return RawToken.getIdentifierInfo()->getName();
268  return StringRef();
269}
270
271// Expand __has_include and __has_include_next if possible. If there's no
272// definitive answer return false.
273bool InclusionRewriter::HandleHasInclude(
274    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
275    bool &FileExists) {
276  // Lex the opening paren.
277  RawLex.LexFromRawLexer(Tok);
278  if (Tok.isNot(tok::l_paren))
279    return false;
280
281  RawLex.LexFromRawLexer(Tok);
282
283  SmallString<128> FilenameBuffer;
284  StringRef Filename;
285  // Since the raw lexer doesn't give us angle_literals we have to parse them
286  // ourselves.
287  // FIXME: What to do if the file name is a macro?
288  if (Tok.is(tok::less)) {
289    RawLex.LexFromRawLexer(Tok);
290
291    FilenameBuffer += '<';
292    do {
293      if (Tok.is(tok::eod)) // Sanity check.
294        return false;
295
296      if (Tok.is(tok::raw_identifier))
297        PP.LookUpIdentifierInfo(Tok);
298
299      // Get the string piece.
300      SmallVector<char, 128> TmpBuffer;
301      bool Invalid = false;
302      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
303      if (Invalid)
304        return false;
305
306      FilenameBuffer += TmpName;
307
308      RawLex.LexFromRawLexer(Tok);
309    } while (Tok.isNot(tok::greater));
310
311    FilenameBuffer += '>';
312    Filename = FilenameBuffer;
313  } else {
314    if (Tok.isNot(tok::string_literal))
315      return false;
316
317    bool Invalid = false;
318    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
319    if (Invalid)
320      return false;
321  }
322
323  // Lex the closing paren.
324  RawLex.LexFromRawLexer(Tok);
325  if (Tok.isNot(tok::r_paren))
326    return false;
327
328  // Now ask HeaderInfo if it knows about the header.
329  // FIXME: Subframeworks aren't handled here. Do we care?
330  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
331  const DirectoryLookup *CurDir;
332  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
333      Filename, SourceLocation(), isAngled, 0, CurDir,
334      PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
335
336  FileExists = File != 0;
337  return true;
338}
339
340/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
341/// and including content of included files recursively.
342bool InclusionRewriter::Process(FileID FileId,
343                                SrcMgr::CharacteristicKind FileType)
344{
345  bool Invalid;
346  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
347  if (Invalid) // invalid inclusion
348    return false;
349  const char *FileName = FromFile.getBufferIdentifier();
350  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
351  RawLex.SetCommentRetentionState(false);
352
353  StringRef EOL = DetectEOL(FromFile);
354
355  // Per the GNU docs: "1" indicates the start of a new file.
356  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
357
358  if (SM.getFileIDSize(FileId) == 0)
359    return false;
360
361  // The next byte to be copied from the source file, which may be non-zero if
362  // the lexer handled a BOM.
363  unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
364  assert(SM.getLineNumber(FileId, NextToWrite) == 1);
365  int Line = 1; // The current input file line number.
366
367  Token RawToken;
368  RawLex.LexFromRawLexer(RawToken);
369
370  // TODO: Consider adding a switch that strips possibly unimportant content,
371  // such as comments, to reduce the size of repro files.
372  while (RawToken.isNot(tok::eof)) {
373    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
374      RawLex.setParsingPreprocessorDirective(true);
375      Token HashToken = RawToken;
376      RawLex.LexFromRawLexer(RawToken);
377      if (RawToken.is(tok::raw_identifier))
378        PP.LookUpIdentifierInfo(RawToken);
379      if (RawToken.getIdentifierInfo() != NULL) {
380        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
381          case tok::pp_include:
382          case tok::pp_include_next:
383          case tok::pp_import: {
384            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
385              Line);
386            StringRef LineInfoExtra;
387            if (const FileChange *Change = FindFileChangeLocation(
388                HashToken.getLocation())) {
389              if (Change->Mod) {
390                WriteImplicitModuleImport(Change->Mod, EOL);
391
392              // else now include and recursively process the file
393              } else if (Process(Change->Id, Change->FileType)) {
394                // and set lineinfo back to this file, if the nested one was
395                // actually included
396                // `2' indicates returning to a file (after having included
397                // another file.
398                LineInfoExtra = " 2";
399              }
400            }
401            // fix up lineinfo (since commented out directive changed line
402            // numbers) for inclusions that were skipped due to header guards
403            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
404            break;
405          }
406          case tok::pp_pragma: {
407            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
408            if (Identifier == "clang" || Identifier == "GCC") {
409              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
410                // keep the directive in, commented out
411                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
412                  NextToWrite, Line);
413                // update our own type
414                FileType = SM.getFileCharacteristic(RawToken.getLocation());
415                WriteLineInfo(FileName, Line, FileType, EOL);
416              }
417            } else if (Identifier == "once") {
418              // keep the directive in, commented out
419              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
420                NextToWrite, Line);
421              WriteLineInfo(FileName, Line, FileType, EOL);
422            }
423            break;
424          }
425          case tok::pp_if:
426          case tok::pp_elif: {
427            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
428                         tok::pp_elif);
429            // Rewrite special builtin macros to avoid pulling in host details.
430            do {
431              // Walk over the directive.
432              RawLex.LexFromRawLexer(RawToken);
433              if (RawToken.is(tok::raw_identifier))
434                PP.LookUpIdentifierInfo(RawToken);
435
436              if (RawToken.is(tok::identifier)) {
437                bool HasFile;
438                SourceLocation Loc = RawToken.getLocation();
439
440                // Rewrite __has_include(x)
441                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
442                  if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
443                    continue;
444                  // Rewrite __has_include_next(x)
445                } else if (RawToken.getIdentifierInfo()->isStr(
446                               "__has_include_next")) {
447                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
448                  if (Lookup)
449                    ++Lookup;
450
451                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
452                                        HasFile))
453                    continue;
454                } else {
455                  continue;
456                }
457                // Replace the macro with (0) or (1), followed by the commented
458                // out macro for reference.
459                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
460                                  EOL, Line, false);
461                OS << '(' << (int) HasFile << ")/*";
462                OutputContentUpTo(FromFile, NextToWrite,
463                                  SM.getFileOffset(RawToken.getLocation()) +
464                                  RawToken.getLength(),
465                                  EOL, Line, false);
466                OS << "*/";
467              }
468            } while (RawToken.isNot(tok::eod));
469            if (elif) {
470              OutputContentUpTo(FromFile, NextToWrite,
471                                SM.getFileOffset(RawToken.getLocation()) +
472                                    RawToken.getLength(),
473                                EOL, Line, /*EnsureNewLine*/ true);
474              WriteLineInfo(FileName, Line, FileType, EOL);
475            }
476            break;
477          }
478          case tok::pp_endif:
479          case tok::pp_else: {
480            // We surround every #include by #if 0 to comment it out, but that
481            // changes line numbers. These are fixed up right after that, but
482            // the whole #include could be inside a preprocessor conditional
483            // that is not processed. So it is necessary to fix the line
484            // numbers one the next line after each #else/#endif as well.
485            RawLex.SetKeepWhitespaceMode(true);
486            do {
487              RawLex.LexFromRawLexer(RawToken);
488            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
489            OutputContentUpTo(
490                FromFile, NextToWrite,
491                SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
492                EOL, Line, /*EnsureNewLine*/ true);
493            WriteLineInfo(FileName, Line, FileType, EOL);
494            RawLex.SetKeepWhitespaceMode(false);
495          }
496          default:
497            break;
498        }
499      }
500      RawLex.setParsingPreprocessorDirective(false);
501    }
502    RawLex.LexFromRawLexer(RawToken);
503  }
504  OutputContentUpTo(FromFile, NextToWrite,
505    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
506    /*EnsureNewline*/true);
507  return true;
508}
509
510/// InclusionRewriterInInput - Implement -frewrite-includes mode.
511void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
512                                   const PreprocessorOutputOptions &Opts) {
513  SourceManager &SM = PP.getSourceManager();
514  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
515                                                     Opts.ShowLineMarkers);
516  PP.addPPCallbacks(Rewrite);
517  // Ignore all pragmas, otherwise there will be warnings about unknown pragmas
518  // (because there's nothing to handle them).
519  PP.AddPragmaHandler(new EmptyPragmaHandler());
520  // Ignore also all pragma in all namespaces created
521  // in Preprocessor::RegisterBuiltinPragmas().
522  PP.AddPragmaHandler("GCC", new EmptyPragmaHandler());
523  PP.AddPragmaHandler("clang", new EmptyPragmaHandler());
524
525  // First let the preprocessor process the entire file and call callbacks.
526  // Callbacks will record which #include's were actually performed.
527  PP.EnterMainSourceFile();
528  Token Tok;
529  // Only preprocessor directives matter here, so disable macro expansion
530  // everywhere else as an optimization.
531  // TODO: It would be even faster if the preprocessor could be switched
532  // to a mode where it would parse only preprocessor directives and comments,
533  // nothing else matters for parsing or processing.
534  PP.SetMacroExpansionOnlyInDirectives();
535  do {
536    PP.Lex(Tok);
537  } while (Tok.isNot(tok::eof));
538  Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
539  Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
540  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
541  OS->flush();
542}
543