InclusionRewriter.cpp revision f91fc4bfa3561990bbb39d3ed2045a75cbaa0df4
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions.  This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
18#include "clang/Lex/HeaderSearch.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Lex/Preprocessor.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace clang;
25using namespace llvm;
26
27namespace {
28
29class InclusionRewriter : public PPCallbacks {
30  /// Information about which #includes were actually performed,
31  /// created by preprocessor callbacks.
32  struct FileChange {
33    const Module *Mod;
34    SourceLocation From;
35    FileID Id;
36    SrcMgr::CharacteristicKind FileType;
37    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
38    }
39  };
40  Preprocessor &PP; ///< Used to find inclusion directives.
41  SourceManager &SM; ///< Used to read and manage source files.
42  raw_ostream &OS; ///< The destination stream for rewritten contents.
43  const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
44  bool ShowLineMarkers; ///< Show #line markers.
45  bool UseLineDirective; ///< Use of line directives or line markers.
46  typedef std::map<unsigned, FileChange> FileChangeMap;
47  FileChangeMap FileChanges; ///< Tracks which files were included where.
48  /// Used transitively for building up the FileChanges mapping over the
49  /// various \c PPCallbacks callbacks.
50  FileChangeMap::iterator LastInsertedFileChange;
51public:
52  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
53  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
54  void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
55    PredefinesBuffer = Buf;
56  }
57private:
58  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
59                           SrcMgr::CharacteristicKind FileType,
60                           FileID PrevFID);
61  virtual void FileSkipped(const FileEntry &ParentFile,
62                           const Token &FilenameTok,
63                           SrcMgr::CharacteristicKind FileType);
64  virtual void InclusionDirective(SourceLocation HashLoc,
65                                  const Token &IncludeTok,
66                                  StringRef FileName,
67                                  bool IsAngled,
68                                  CharSourceRange FilenameRange,
69                                  const FileEntry *File,
70                                  StringRef SearchPath,
71                                  StringRef RelativePath,
72                                  const Module *Imported);
73  void WriteLineInfo(const char *Filename, int Line,
74                     SrcMgr::CharacteristicKind FileType,
75                     StringRef EOL, StringRef Extra = StringRef());
76  void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
77  void OutputContentUpTo(const MemoryBuffer &FromFile,
78                         unsigned &WriteFrom, unsigned WriteTo,
79                         StringRef EOL, int &lines,
80                         bool EnsureNewline = false);
81  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
82                           const MemoryBuffer &FromFile, StringRef EOL,
83                           unsigned &NextToWrite, int &Lines);
84  bool HandleHasInclude(FileID FileId, Lexer &RawLex,
85                        const DirectoryLookup *Lookup, Token &Tok,
86                        bool &FileExists);
87  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
88  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
89};
90
91}  // end anonymous namespace
92
93/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
94InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
95                                     bool ShowLineMarkers)
96    : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(0),
97    ShowLineMarkers(ShowLineMarkers),
98    LastInsertedFileChange(FileChanges.end()) {
99  // If we're in microsoft mode, use normal #line instead of line markers.
100  UseLineDirective = PP.getLangOpts().MicrosoftExt;
101}
102
103/// Write appropriate line information as either #line directives or GNU line
104/// markers depending on what mode we're in, including the \p Filename and
105/// \p Line we are located at, using the specified \p EOL line separator, and
106/// any \p Extra context specifiers in GNU line directives.
107void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
108                                      SrcMgr::CharacteristicKind FileType,
109                                      StringRef EOL, StringRef Extra) {
110  if (!ShowLineMarkers)
111    return;
112  if (UseLineDirective) {
113    OS << "#line" << ' ' << Line << ' ' << '"';
114    OS.write_escaped(Filename);
115    OS << '"';
116  } else {
117    // Use GNU linemarkers as described here:
118    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
119    OS << '#' << ' ' << Line << ' ' << '"';
120    OS.write_escaped(Filename);
121    OS << '"';
122    if (!Extra.empty())
123      OS << Extra;
124    if (FileType == SrcMgr::C_System)
125      // "`3' This indicates that the following text comes from a system header
126      // file, so certain warnings should be suppressed."
127      OS << " 3";
128    else if (FileType == SrcMgr::C_ExternCSystem)
129      // as above for `3', plus "`4' This indicates that the following text
130      // should be treated as being wrapped in an implicit extern "C" block."
131      OS << " 3 4";
132  }
133  OS << EOL;
134}
135
136void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
137                                                  StringRef EOL) {
138  OS << "@import " << Mod->getFullModuleName() << ";"
139     << " /* clang -frewrite-includes: implicit import */" << EOL;
140}
141
142/// FileChanged - Whenever the preprocessor enters or exits a #include file
143/// it invokes this handler.
144void InclusionRewriter::FileChanged(SourceLocation Loc,
145                                    FileChangeReason Reason,
146                                    SrcMgr::CharacteristicKind NewFileType,
147                                    FileID) {
148  if (Reason != EnterFile)
149    return;
150  if (LastInsertedFileChange == FileChanges.end())
151    // we didn't reach this file (eg: the main file) via an inclusion directive
152    return;
153  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
154  LastInsertedFileChange->second.FileType = NewFileType;
155  LastInsertedFileChange = FileChanges.end();
156}
157
158/// Called whenever an inclusion is skipped due to canonical header protection
159/// macros.
160void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
161                                    const Token &/*FilenameTok*/,
162                                    SrcMgr::CharacteristicKind /*FileType*/) {
163  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
164    "found via an inclusion directive, was skipped");
165  FileChanges.erase(LastInsertedFileChange);
166  LastInsertedFileChange = FileChanges.end();
167}
168
169/// This should be called whenever the preprocessor encounters include
170/// directives. It does not say whether the file has been included, but it
171/// provides more information about the directive (hash location instead
172/// of location inside the included file). It is assumed that the matching
173/// FileChanged() or FileSkipped() is called after this.
174void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
175                                           const Token &/*IncludeTok*/,
176                                           StringRef /*FileName*/,
177                                           bool /*IsAngled*/,
178                                           CharSourceRange /*FilenameRange*/,
179                                           const FileEntry * /*File*/,
180                                           StringRef /*SearchPath*/,
181                                           StringRef /*RelativePath*/,
182                                           const Module *Imported) {
183  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
184    "directive was found before the previous one was processed");
185  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
186    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
187  assert(p.second && "Unexpected revisitation of the same include directive");
188  if (!Imported)
189    LastInsertedFileChange = p.first;
190}
191
192/// Simple lookup for a SourceLocation (specifically one denoting the hash in
193/// an inclusion directive) in the map of inclusion information, FileChanges.
194const InclusionRewriter::FileChange *
195InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
196  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
197  if (I != FileChanges.end())
198    return &I->second;
199  return NULL;
200}
201
202/// Detect the likely line ending style of \p FromFile by examining the first
203/// newline found within it.
204static StringRef DetectEOL(const MemoryBuffer &FromFile) {
205  // detect what line endings the file uses, so that added content does not mix
206  // the style
207  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
208  if (Pos == NULL)
209    return "\n";
210  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
211    return "\n\r";
212  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
213    return "\r\n";
214  return "\n";
215}
216
217/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
218/// \p WriteTo - 1.
219void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
220                                          unsigned &WriteFrom, unsigned WriteTo,
221                                          StringRef EOL, int &Line,
222                                          bool EnsureNewline) {
223  if (WriteTo <= WriteFrom)
224    return;
225  if (&FromFile == PredefinesBuffer) {
226    // Ignore the #defines of the predefines buffer.
227    WriteFrom = WriteTo;
228    return;
229  }
230  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
231  // count lines manually, it's faster than getPresumedLoc()
232  Line += std::count(FromFile.getBufferStart() + WriteFrom,
233                     FromFile.getBufferStart() + WriteTo, '\n');
234  if (EnsureNewline) {
235    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
236    if (LastChar != '\n' && LastChar != '\r')
237      OS << EOL;
238  }
239  WriteFrom = WriteTo;
240}
241
242/// Print characters from \p FromFile starting at \p NextToWrite up until the
243/// inclusion directive at \p StartToken, then print out the inclusion
244/// inclusion directive disabled by a #if directive, updating \p NextToWrite
245/// and \p Line to track the number of source lines visited and the progress
246/// through the \p FromFile buffer.
247void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
248                                            const Token &StartToken,
249                                            const MemoryBuffer &FromFile,
250                                            StringRef EOL,
251                                            unsigned &NextToWrite, int &Line) {
252  OutputContentUpTo(FromFile, NextToWrite,
253    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
254  Token DirectiveToken;
255  do {
256    DirectiveLex.LexFromRawLexer(DirectiveToken);
257  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
258  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
259  OutputContentUpTo(FromFile, NextToWrite,
260    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
261    EOL, Line);
262  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
263}
264
265/// Find the next identifier in the pragma directive specified by \p RawToken.
266StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
267                                                Token &RawToken) {
268  RawLex.LexFromRawLexer(RawToken);
269  if (RawToken.is(tok::raw_identifier))
270    PP.LookUpIdentifierInfo(RawToken);
271  if (RawToken.is(tok::identifier))
272    return RawToken.getIdentifierInfo()->getName();
273  return StringRef();
274}
275
276// Expand __has_include and __has_include_next if possible. If there's no
277// definitive answer return false.
278bool InclusionRewriter::HandleHasInclude(
279    FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
280    bool &FileExists) {
281  // Lex the opening paren.
282  RawLex.LexFromRawLexer(Tok);
283  if (Tok.isNot(tok::l_paren))
284    return false;
285
286  RawLex.LexFromRawLexer(Tok);
287
288  SmallString<128> FilenameBuffer;
289  StringRef Filename;
290  // Since the raw lexer doesn't give us angle_literals we have to parse them
291  // ourselves.
292  // FIXME: What to do if the file name is a macro?
293  if (Tok.is(tok::less)) {
294    RawLex.LexFromRawLexer(Tok);
295
296    FilenameBuffer += '<';
297    do {
298      if (Tok.is(tok::eod)) // Sanity check.
299        return false;
300
301      if (Tok.is(tok::raw_identifier))
302        PP.LookUpIdentifierInfo(Tok);
303
304      // Get the string piece.
305      SmallVector<char, 128> TmpBuffer;
306      bool Invalid = false;
307      StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
308      if (Invalid)
309        return false;
310
311      FilenameBuffer += TmpName;
312
313      RawLex.LexFromRawLexer(Tok);
314    } while (Tok.isNot(tok::greater));
315
316    FilenameBuffer += '>';
317    Filename = FilenameBuffer;
318  } else {
319    if (Tok.isNot(tok::string_literal))
320      return false;
321
322    bool Invalid = false;
323    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
324    if (Invalid)
325      return false;
326  }
327
328  // Lex the closing paren.
329  RawLex.LexFromRawLexer(Tok);
330  if (Tok.isNot(tok::r_paren))
331    return false;
332
333  // Now ask HeaderInfo if it knows about the header.
334  // FIXME: Subframeworks aren't handled here. Do we care?
335  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
336  const DirectoryLookup *CurDir;
337  const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
338      Filename, isAngled, 0, CurDir,
339      PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
340
341  FileExists = File != 0;
342  return true;
343}
344
345/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
346/// and including content of included files recursively.
347bool InclusionRewriter::Process(FileID FileId,
348                                SrcMgr::CharacteristicKind FileType)
349{
350  bool Invalid;
351  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
352  if (Invalid) // invalid inclusion
353    return false;
354  const char *FileName = FromFile.getBufferIdentifier();
355  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
356  RawLex.SetCommentRetentionState(false);
357
358  StringRef EOL = DetectEOL(FromFile);
359
360  // Per the GNU docs: "1" indicates the start of a new file.
361  WriteLineInfo(FileName, 1, FileType, EOL, " 1");
362
363  if (SM.getFileIDSize(FileId) == 0)
364    return false;
365
366  // The next byte to be copied from the source file
367  unsigned NextToWrite = 0;
368  int Line = 1; // The current input file line number.
369
370  // Ignore UTF-8 BOM, otherwise it'd end up somewhere else than the start
371  // of the resulting file.
372  if (FromFile.getBuffer().startswith("\xEF\xBB\xBF"))
373    NextToWrite = 3;
374
375  Token RawToken;
376  RawLex.LexFromRawLexer(RawToken);
377
378  // TODO: Consider adding a switch that strips possibly unimportant content,
379  // such as comments, to reduce the size of repro files.
380  while (RawToken.isNot(tok::eof)) {
381    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
382      RawLex.setParsingPreprocessorDirective(true);
383      Token HashToken = RawToken;
384      RawLex.LexFromRawLexer(RawToken);
385      if (RawToken.is(tok::raw_identifier))
386        PP.LookUpIdentifierInfo(RawToken);
387      if (RawToken.getIdentifierInfo() != NULL) {
388        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
389          case tok::pp_include:
390          case tok::pp_include_next:
391          case tok::pp_import: {
392            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
393              Line);
394            StringRef LineInfoExtra;
395            if (const FileChange *Change = FindFileChangeLocation(
396                HashToken.getLocation())) {
397              if (Change->Mod) {
398                WriteImplicitModuleImport(Change->Mod, EOL);
399
400              // else now include and recursively process the file
401              } else if (Process(Change->Id, Change->FileType)) {
402                // and set lineinfo back to this file, if the nested one was
403                // actually included
404                // `2' indicates returning to a file (after having included
405                // another file.
406                LineInfoExtra = " 2";
407              }
408            }
409            // fix up lineinfo (since commented out directive changed line
410            // numbers) for inclusions that were skipped due to header guards
411            WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
412            break;
413          }
414          case tok::pp_pragma: {
415            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
416            if (Identifier == "clang" || Identifier == "GCC") {
417              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
418                // keep the directive in, commented out
419                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
420                  NextToWrite, Line);
421                // update our own type
422                FileType = SM.getFileCharacteristic(RawToken.getLocation());
423                WriteLineInfo(FileName, Line, FileType, EOL);
424              }
425            } else if (Identifier == "once") {
426              // keep the directive in, commented out
427              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
428                NextToWrite, Line);
429              WriteLineInfo(FileName, Line, FileType, EOL);
430            }
431            break;
432          }
433          case tok::pp_if:
434          case tok::pp_elif: {
435            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
436                         tok::pp_elif);
437            // Rewrite special builtin macros to avoid pulling in host details.
438            do {
439              // Walk over the directive.
440              RawLex.LexFromRawLexer(RawToken);
441              if (RawToken.is(tok::raw_identifier))
442                PP.LookUpIdentifierInfo(RawToken);
443
444              if (RawToken.is(tok::identifier)) {
445                bool HasFile;
446                SourceLocation Loc = RawToken.getLocation();
447
448                // Rewrite __has_include(x)
449                if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
450                  if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
451                    continue;
452                  // Rewrite __has_include_next(x)
453                } else if (RawToken.getIdentifierInfo()->isStr(
454                               "__has_include_next")) {
455                  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
456                  if (Lookup)
457                    ++Lookup;
458
459                  if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
460                                        HasFile))
461                    continue;
462                } else {
463                  continue;
464                }
465                // Replace the macro with (0) or (1), followed by the commented
466                // out macro for reference.
467                OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
468                                  EOL, Line);
469                OS << '(' << (int) HasFile << ")/*";
470                OutputContentUpTo(FromFile, NextToWrite,
471                                  SM.getFileOffset(RawToken.getLocation()) +
472                                  RawToken.getLength(),
473                                  EOL, Line);
474                OS << "*/";
475              }
476            } while (RawToken.isNot(tok::eod));
477            if (elif) {
478              OutputContentUpTo(FromFile, NextToWrite,
479                                SM.getFileOffset(RawToken.getLocation()) +
480                                    RawToken.getLength(),
481                                EOL, Line, /*EnsureNewLine*/ true);
482              WriteLineInfo(FileName, Line, FileType, EOL);
483            }
484            break;
485          }
486          case tok::pp_endif:
487          case tok::pp_else: {
488            // We surround every #include by #if 0 to comment it out, but that
489            // changes line numbers. These are fixed up right after that, but
490            // the whole #include could be inside a preprocessor conditional
491            // that is not processed. So it is necessary to fix the line
492            // numbers one the next line after each #else/#endif as well.
493            RawLex.SetKeepWhitespaceMode(true);
494            do {
495              RawLex.LexFromRawLexer(RawToken);
496            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
497            OutputContentUpTo(
498                FromFile, NextToWrite,
499                SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
500                EOL, Line, /*EnsureNewLine*/ true);
501            WriteLineInfo(FileName, Line, FileType, EOL);
502            RawLex.SetKeepWhitespaceMode(false);
503          }
504          default:
505            break;
506        }
507      }
508      RawLex.setParsingPreprocessorDirective(false);
509    }
510    RawLex.LexFromRawLexer(RawToken);
511  }
512  OutputContentUpTo(FromFile, NextToWrite,
513    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
514    /*EnsureNewline*/true);
515  return true;
516}
517
518/// InclusionRewriterInInput - Implement -frewrite-includes mode.
519void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
520                                   const PreprocessorOutputOptions &Opts) {
521  SourceManager &SM = PP.getSourceManager();
522  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
523                                                     Opts.ShowLineMarkers);
524  PP.addPPCallbacks(Rewrite);
525  // Ignore all pragmas, otherwise there will be warnings about unknown pragmas
526  // (because there's nothing to handle them).
527  PP.AddPragmaHandler(new EmptyPragmaHandler());
528  // Ignore also all pragma in all namespaces created
529  // in Preprocessor::RegisterBuiltinPragmas().
530  PP.AddPragmaHandler("GCC", new EmptyPragmaHandler());
531  PP.AddPragmaHandler("clang", new EmptyPragmaHandler());
532
533  // First let the preprocessor process the entire file and call callbacks.
534  // Callbacks will record which #include's were actually performed.
535  PP.EnterMainSourceFile();
536  Token Tok;
537  // Only preprocessor directives matter here, so disable macro expansion
538  // everywhere else as an optimization.
539  // TODO: It would be even faster if the preprocessor could be switched
540  // to a mode where it would parse only preprocessor directives and comments,
541  // nothing else matters for parsing or processing.
542  PP.SetMacroExpansionOnlyInDirectives();
543  do {
544    PP.Lex(Tok);
545  } while (Tok.isNot(tok::eof));
546  Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
547  Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
548  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
549  OS->flush();
550}
551