InclusionRewriter.cpp revision 8ee6a0dcc985c65bf5fd61a63e3f86e3ac516f5e
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/HeaderSearch.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Lex/Preprocessor.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace clang;
25using namespace llvm;
26
27namespace {
28
29class InclusionRewriter : public PPCallbacks {
30  /// Information about which #includes were actually performed,
31  /// created by preprocessor callbacks.
32  struct FileChange {
33    const Module *Mod;
34    SourceLocation From;
35    FileID Id;
36    SrcMgr::CharacteristicKind FileType;
37    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
38    }
39  };
40  Preprocessor &PP; ///< Used to find inclusion directives.
41  SourceManager &SM; ///< Used to read and manage source files.
42  raw_ostream &OS; ///< The destination stream for rewritten contents.
43  bool ShowLineMarkers; ///< Show #line markers.
44  bool UseLineDirective; ///< Use of line directives or line markers.
45  typedef std::map<unsigned, FileChange> FileChangeMap;
46  FileChangeMap FileChanges; ///< Tracks which files were included where.
47  /// Used transitively for building up the FileChanges mapping over the
48  /// various \c PPCallbacks callbacks.
49  FileChangeMap::iterator LastInsertedFileChange;
50public:
51  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
52  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
53private:
54  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
55                           SrcMgr::CharacteristicKind FileType,
56                           FileID PrevFID);
57  virtual void FileSkipped(const FileEntry &ParentFile,
58                           const Token &FilenameTok,
59                           SrcMgr::CharacteristicKind FileType);
60  virtual void InclusionDirective(SourceLocation HashLoc,
61                                  const Token &IncludeTok,
62                                  StringRef FileName,
63                                  bool IsAngled,
64                                  CharSourceRange FilenameRange,
65                                  const FileEntry *File,
66                                  StringRef SearchPath,
67                                  StringRef RelativePath,
68                                  const Module *Imported);
69  void WriteLineInfo(const char *Filename, int Line,
70                     SrcMgr::CharacteristicKind FileType,
71                     StringRef EOL, StringRef Extra = StringRef());
72  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
73  void OutputContentUpTo(const MemoryBuffer &FromFile,
74                         unsigned &WriteFrom, unsigned WriteTo,
75                         StringRef EOL, int &lines,
76                         bool EnsureNewline = false);
77  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
78                           const MemoryBuffer &FromFile, StringRef EOL,
79                           unsigned &NextToWrite, int &Lines);
80  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
81                        const DirectoryLookup *Lookup, Token &Tok,
82                        bool &FileExists);
83  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
84  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
85};
86
87}  // end anonymous namespace
88
89/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
90InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
91                                     bool ShowLineMarkers)
92    : PP(PP), SM(PP.getSourceManager()), OS(OS),
93    ShowLineMarkers(ShowLineMarkers),
94    LastInsertedFileChange(FileChanges.end()) {
95  // If we're in microsoft mode, use normal #line instead of line markers.
96  UseLineDirective = PP.getLangOpts().MicrosoftExt;
97}
98
99/// Write appropriate line information as either #line directives or GNU line
100/// markers depending on what mode we're in, including the \p Filename and
101/// \p Line we are located at, using the specified \p EOL line separator, and
102/// any \p Extra context specifiers in GNU line directives.
103void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
104                                      SrcMgr::CharacteristicKind FileType,
105                                      StringRef EOL, StringRef Extra) {
106  if (!ShowLineMarkers)
107    return;
108  if (UseLineDirective) {
109    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
110  } else {
111    // Use GNU linemarkers as described here:
112    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
113    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
114    if (!Extra.empty())
115      OS << Extra;
116    if (FileType == SrcMgr::C_System)
117      // "`3' This indicates that the following text comes from a system header
118      // file, so certain warnings should be suppressed."
119      OS << " 3";
120    else if (FileType == SrcMgr::C_ExternCSystem)
121      // as above for `3', plus "`4' This indicates that the following text
122      // should be treated as being wrapped in an implicit extern "C" block."
123      OS << " 3 4";
124  }
125  OS << EOL;
126}
127
128void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
129                                                  StringRef EOL) {
130  OS << "@import " << Mod->getFullModuleName() << ";"
131     << " /* clang -frewrite-includes: implicit import */" << EOL;
132}
133
134/// FileChanged - Whenever the preprocessor enters or exits a #include file
135/// it invokes this handler.
136void InclusionRewriter::FileChanged(SourceLocation Loc,
137                                    FileChangeReason Reason,
138                                    SrcMgr::CharacteristicKind NewFileType,
139                                    FileID) {
140  if (Reason != EnterFile)
141    return;
142  if (LastInsertedFileChange == FileChanges.end())
143    // we didn't reach this file (eg: the main file) via an inclusion directive
144    return;
145  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
146  LastInsertedFileChange->second.FileType = NewFileType;
147  LastInsertedFileChange = FileChanges.end();
148}
149
150/// Called whenever an inclusion is skipped due to canonical header protection
151/// macros.
152void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
153                                    const Token &/*FilenameTok*/,
154                                    SrcMgr::CharacteristicKind /*FileType*/) {
155  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
156    "found via an inclusion directive, was skipped");
157  FileChanges.erase(LastInsertedFileChange);
158  LastInsertedFileChange = FileChanges.end();
159}
160
161/// This should be called whenever the preprocessor encounters include
162/// directives. It does not say whether the file has been included, but it
163/// provides more information about the directive (hash location instead
164/// of location inside the included file). It is assumed that the matching
165/// FileChanged() or FileSkipped() is called after this.
166void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
167                                           const Token &/*IncludeTok*/,
168                                           StringRef /*FileName*/,
169                                           bool /*IsAngled*/,
170                                           CharSourceRange /*FilenameRange*/,
171                                           const FileEntry * /*File*/,
172                                           StringRef /*SearchPath*/,
173                                           StringRef /*RelativePath*/,
174                                           const Module *Imported) {
175  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
176    "directive was found before the previous one was processed");
177  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
178    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
179  assert(p.second && "Unexpected revisitation of the same include directive");
180  if (!Imported)
181    LastInsertedFileChange = p.first;
182}
183
184/// Simple lookup for a SourceLocation (specifically one denoting the hash in
185/// an inclusion directive) in the map of inclusion information, FileChanges.
186const InclusionRewriter::FileChange *
187InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
188  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
189  if (I != FileChanges.end())
190    return &I->second;
191  return NULL;
192}
193
194/// Detect the likely line ending style of \p FromFile by examining the first
195/// newline found within it.
196static StringRef DetectEOL(const MemoryBuffer &FromFile) {
197  // detect what line endings the file uses, so that added content does not mix
198  // the style
199  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
200  if (Pos == NULL)
201    return "\n";
202  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
203    return "\n\r";
204  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
205    return "\r\n";
206  return "\n";
207}
208
209/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
210/// \p WriteTo - 1.
211void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
212                                          unsigned &WriteFrom, unsigned WriteTo,
213                                          StringRef EOL, int &Line,
214                                          bool EnsureNewline) {
215  if (WriteTo <= WriteFrom)
216    return;
217  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
218  // count lines manually, it's faster than getPresumedLoc()
219  Line += std::count(FromFile.getBufferStart() + WriteFrom,
220                     FromFile.getBufferStart() + WriteTo, '\n');
221  if (EnsureNewline) {
222    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
223    if (LastChar != '\n' && LastChar != '\r')
224      OS << EOL;
225  }
226  WriteFrom = WriteTo;
227}
228
229/// Print characters from \p FromFile starting at \p NextToWrite up until the
230/// inclusion directive at \p StartToken, then print out the inclusion
231/// inclusion directive disabled by a #if directive, updating \p NextToWrite
232/// and \p Line to track the number of source lines visited and the progress
233/// through the \p FromFile buffer.
234void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
235                                            const Token &StartToken,
236                                            const MemoryBuffer &FromFile,
237                                            StringRef EOL,
238                                            unsigned &NextToWrite, int &Line) {
239  OutputContentUpTo(FromFile, NextToWrite,
240    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
241  Token DirectiveToken;
242  do {
243    DirectiveLex.LexFromRawLexer(DirectiveToken);
244  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
245  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
246  OutputContentUpTo(FromFile, NextToWrite,
247    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
248    EOL, Line);
249  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
250}
251
252/// Find the next identifier in the pragma directive specified by \p RawToken.
253StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
254                                                Token &RawToken) {
255  RawLex.LexFromRawLexer(RawToken);
256  if (RawToken.is(tok::raw_identifier))
257    PP.LookUpIdentifierInfo(RawToken);
258  if (RawToken.is(tok::identifier))
259    return RawToken.getIdentifierInfo()->getName();
260  return StringRef();
261}
262
263// Expand __has_include and __has_include_next if possible. If there's no
264// definitive answer return false.
265bool InclusionRewriter::HandleHasInclude(
266    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
267    bool &FileExists) {
268  // Lex the opening paren.
269  RawLex.LexFromRawLexer(Tok);
270  if (Tok.isNot(tok::l_paren))
271    return false;
272
273  RawLex.LexFromRawLexer(Tok);
274
275  SmallString<128> FilenameBuffer;
276  StringRef Filename;
277  // Since the raw lexer doesn't give us angle_literals we have to parse them
278  // ourselves.
279  // FIXME: What to do if the file name is a macro?
280  if (Tok.is(tok::less)) {
281    RawLex.LexFromRawLexer(Tok);
282
283    FilenameBuffer += '<';
284    do {
285      if (Tok.is(tok::eod)) // Sanity check.
286        return false;
287
288      if (Tok.is(tok::raw_identifier))
289        PP.LookUpIdentifierInfo(Tok);
290
291      // Get the string piece.
292      SmallVector<char, 128> TmpBuffer;
293      bool Invalid = false;
294      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
295      if (Invalid)
296        return false;
297
298      FilenameBuffer += TmpName;
299
300      RawLex.LexFromRawLexer(Tok);
301    } while (Tok.isNot(tok::greater));
302
303    FilenameBuffer += '>';
304    Filename = FilenameBuffer;
305  } else {
306    if (Tok.isNot(tok::string_literal))
307      return false;
308
309    bool Invalid = false;
310    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
311    if (Invalid)
312      return false;
313  }
314
315  // Lex the closing paren.
316  RawLex.LexFromRawLexer(Tok);
317  if (Tok.isNot(tok::r_paren))
318    return false;
319
320  // Now ask HeaderInfo if it knows about the header.
321  // FIXME: Subframeworks aren't handled here. Do we care?
322  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
323  const DirectoryLookup *CurDir;
324  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
325      Filename, isAngled, 0, CurDir,
326      PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
327
328  FileExists = File != 0;
329  return true;
330}
331
332/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
333/// and including content of included files recursively.
334bool InclusionRewriter::Process(FileID FileId,
335                                SrcMgr::CharacteristicKind FileType)
336{
337  bool Invalid;
338  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
339  if (Invalid) // invalid inclusion
340    return false;
341  const char *FileName = FromFile.getBufferIdentifier();
342  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
343  RawLex.SetCommentRetentionState(false);
344
345  StringRef EOL = DetectEOL(FromFile);
346
347  // Per the GNU docs: "1" indicates the start of a new file.
348  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
349
350  if (SM.getFileIDSize(FileId) == 0)
351    return false;
352
353  // The next byte to be copied from the source file
354  unsigned NextToWrite = 0;
355  int Line = 1; // The current input file line number.
356
357  Token RawToken;
358  RawLex.LexFromRawLexer(RawToken);
359
360  // TODO: Consider adding a switch that strips possibly unimportant content,
361  // such as comments, to reduce the size of repro files.
362  while (RawToken.isNot(tok::eof)) {
363    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
364      RawLex.setParsingPreprocessorDirective(true);
365      Token HashToken = RawToken;
366      RawLex.LexFromRawLexer(RawToken);
367      if (RawToken.is(tok::raw_identifier))
368        PP.LookUpIdentifierInfo(RawToken);
369      if (RawToken.getIdentifierInfo() != NULL) {
370        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
371          case tok::pp_include:
372          case tok::pp_include_next:
373          case tok::pp_import: {
374            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
375              Line);
376            StringRef LineInfoExtra;
377            if (const FileChange *Change = FindFileChangeLocation(
378                HashToken.getLocation())) {
379              if (Change->Mod) {
380                WriteImplicitModuleImport(Change->Mod, EOL);
381
382              // else now include and recursively process the file
383              } else if (Process(Change->Id, Change->FileType)) {
384                // and set lineinfo back to this file, if the nested one was
385                // actually included
386                // `2' indicates returning to a file (after having included
387                // another file.
388                LineInfoExtra = " 2";
389              }
390            }
391            // fix up lineinfo (since commented out directive changed line
392            // numbers) for inclusions that were skipped due to header guards
393            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
394            break;
395          }
396          case tok::pp_pragma: {
397            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
398            if (Identifier == "clang" || Identifier == "GCC") {
399              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
400                // keep the directive in, commented out
401                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
402                  NextToWrite, Line);
403                // update our own type
404                FileType = SM.getFileCharacteristic(RawToken.getLocation());
405                WriteLineInfo(FileName, Line, FileType, EOL);
406              }
407            } else if (Identifier == "once") {
408              // keep the directive in, commented out
409              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
410                NextToWrite, Line);
411              WriteLineInfo(FileName, Line, FileType, EOL);
412            }
413            break;
414          }
415          case tok::pp_if:
416          case tok::pp_elif: {
417            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
418                         tok::pp_elif);
419            // Rewrite special builtin macros to avoid pulling in host details.
420            do {
421              // Walk over the directive.
422              RawLex.LexFromRawLexer(RawToken);
423              if (RawToken.is(tok::raw_identifier))
424                PP.LookUpIdentifierInfo(RawToken);
425
426              if (RawToken.is(tok::identifier)) {
427                bool HasFile;
428                SourceLocation Loc = RawToken.getLocation();
429
430                // Rewrite __has_include(x)
431                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
432                  if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
433                    continue;
434                  // Rewrite __has_include_next(x)
435                } else if (RawToken.getIdentifierInfo()->isStr(
436                               "__has_include_next")) {
437                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
438                  if (Lookup)
439                    ++Lookup;
440
441                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
442                                        HasFile))
443                    continue;
444                } else {
445                  continue;
446                }
447                // Replace the macro with (0) or (1), followed by the commented
448                // out macro for reference.
449                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
450                                  EOL, Line);
451                OS << '(' << (int) HasFile << ")/*";
452                OutputContentUpTo(FromFile, NextToWrite,
453                                  SM.getFileOffset(RawToken.getLocation()) +
454                                  RawToken.getLength(),
455                                  EOL, Line);
456                OS << "*/";
457              }
458            } while (RawToken.isNot(tok::eod));
459            if (elif) {
460              OutputContentUpTo(FromFile, NextToWrite,
461                                SM.getFileOffset(RawToken.getLocation()) +
462                                    RawToken.getLength(),
463                                EOL, Line, /*EnsureNewLine*/ true);
464              WriteLineInfo(FileName, Line, FileType, EOL);
465            }
466            break;
467          }
468          case tok::pp_endif:
469          case tok::pp_else: {
470            // We surround every #include by #if 0 to comment it out, but that
471            // changes line numbers. These are fixed up right after that, but
472            // the whole #include could be inside a preprocessor conditional
473            // that is not processed. So it is necessary to fix the line
474            // numbers one the next line after each #else/#endif as well.
475            RawLex.SetKeepWhitespaceMode(true);
476            do {
477              RawLex.LexFromRawLexer(RawToken);
478            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
479            OutputContentUpTo(
480                FromFile, NextToWrite,
481                SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
482                EOL, Line, /*EnsureNewLine*/ true);
483            WriteLineInfo(FileName, Line, FileType, EOL);
484            RawLex.SetKeepWhitespaceMode(false);
485          }
486          default:
487            break;
488        }
489      }
490      RawLex.setParsingPreprocessorDirective(false);
491    }
492    RawLex.LexFromRawLexer(RawToken);
493  }
494  OutputContentUpTo(FromFile, NextToWrite,
495    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
496    /*EnsureNewline*/true);
497  return true;
498}
499
500/// InclusionRewriterInInput - Implement -frewrite-includes mode.
501void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
502                                   const PreprocessorOutputOptions &Opts) {
503  SourceManager &SM = PP.getSourceManager();
504  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
505                                                     Opts.ShowLineMarkers);
506  PP.addPPCallbacks(Rewrite);
507  // Ignore all pragmas, otherwise there will be warnings about unknown pragmas
508  // (because there's nothing to handle them).
509  PP.AddPragmaHandler(new EmptyPragmaHandler());
510  // Ignore also all pragma in all namespaces created
511  // in Preprocessor::RegisterBuiltinPragmas().
512  PP.AddPragmaHandler("GCC", new EmptyPragmaHandler());
513  PP.AddPragmaHandler("clang", new EmptyPragmaHandler());
514
515  // First let the preprocessor process the entire file and call callbacks.
516  // Callbacks will record which #include's were actually performed.
517  PP.EnterMainSourceFile();
518  Token Tok;
519  // Only preprocessor directives matter here, so disable macro expansion
520  // everywhere else as an optimization.
521  // TODO: It would be even faster if the preprocessor could be switched
522  // to a mode where it would parse only preprocessor directives and comments,
523  // nothing else matters for parsing or processing.
524  PP.SetMacroExpansionOnlyInDirectives();
525  do {
526    PP.Lex(Tok);
527  } while (Tok.isNot(tok::eof));
528  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
529  OS->flush();
530}
531