InclusionRewriter.cpp revision 305c613af6cfc40e519c75d9d2c84c6fa9a841c0
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Lex/Preprocessor.h" 17#include "clang/Basic/SourceManager.h" 18#include "clang/Frontend/PreprocessorOutputOptions.h" 19#include "llvm/Support/raw_ostream.h" 20 21using namespace clang; 22using namespace llvm; 23 24namespace { 25 26class InclusionRewriter : public PPCallbacks { 27 /// Information about which #includes were actually performed, 28 /// created by preprocessor callbacks. 29 struct FileChange { 30 SourceLocation From; 31 FileID Id; 32 SrcMgr::CharacteristicKind FileType; 33 FileChange(SourceLocation From) : From(From) { 34 } 35 }; 36 Preprocessor &PP; ///< Used to find inclusion directives. 37 SourceManager &SM; ///< Used to read and manage source files. 38 raw_ostream &OS; ///< The destination stream for rewritten contents. 39 bool ShowLineMarkers; ///< Show #line markers. 40 bool UseLineDirective; ///< Use of line directives or line markers. 41 typedef std::map<unsigned, FileChange> FileChangeMap; 42 FileChangeMap FileChanges; /// Tracks which files were included where. 43 /// Used transitively for building up the FileChanges mapping over the 44 /// various \c PPCallbacks callbacks. 45 FileChangeMap::iterator LastInsertedFileChange; 46public: 47 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 48 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 49private: 50 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 51 SrcMgr::CharacteristicKind FileType, 52 FileID PrevFID); 53 virtual void FileSkipped(const FileEntry &ParentFile, 54 const Token &FilenameTok, 55 SrcMgr::CharacteristicKind FileType); 56 virtual void InclusionDirective(SourceLocation HashLoc, 57 const Token &IncludeTok, 58 StringRef FileName, 59 bool IsAngled, 60 const FileEntry *File, 61 SourceLocation EndLoc, 62 StringRef SearchPath, 63 StringRef RelativePath); 64 void WriteLineInfo(const char *Filename, int Line, 65 SrcMgr::CharacteristicKind FileType, 66 StringRef EOL, StringRef Extra = StringRef()); 67 void OutputContentUpTo(const MemoryBuffer &FromFile, 68 unsigned &WriteFrom, unsigned WriteTo, 69 StringRef EOL, int &lines, 70 bool EnsureNewline = false); 71 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 72 const MemoryBuffer &FromFile, StringRef EOL, 73 unsigned &NextToWrite, int &Lines); 74 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 75 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 76}; 77 78} // end anonymous namespace 79 80/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 81InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 82 bool ShowLineMarkers) 83 : PP(PP), SM(PP.getSourceManager()), OS(OS), 84 ShowLineMarkers(ShowLineMarkers), 85 LastInsertedFileChange(FileChanges.end()) { 86 // If we're in microsoft mode, use normal #line instead of line markers. 87 UseLineDirective = PP.getLangOpts().MicrosoftExt; 88} 89 90/// Write appropriate line information as either #line directives or GNU line 91/// markers depending on what mode we're in, including the \p Filename and 92/// \p Line we are located at, using the specified \p EOL line separator, and 93/// any \p Extra context specifiers in GNU line directives. 94void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 95 SrcMgr::CharacteristicKind FileType, 96 StringRef EOL, StringRef Extra) { 97 if (!ShowLineMarkers) 98 return; 99 if (UseLineDirective) { 100 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 101 } else { 102 // Use GNU linemarkers as described here: 103 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 104 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 105 if (!Extra.empty()) 106 OS << Extra; 107 if (FileType == SrcMgr::C_System) 108 // "`3' This indicates that the following text comes from a system header 109 // file, so certain warnings should be suppressed." 110 OS << " 3"; 111 else if (FileType == SrcMgr::C_ExternCSystem) 112 // as above for `3', plus "`4' This indicates that the following text 113 // should be treated as being wrapped in an implicit extern "C" block." 114 OS << " 3 4"; 115 } 116 OS << EOL; 117} 118 119/// FileChanged - Whenever the preprocessor enters or exits a #include file 120/// it invokes this handler. 121void InclusionRewriter::FileChanged(SourceLocation Loc, 122 FileChangeReason Reason, 123 SrcMgr::CharacteristicKind NewFileType, 124 FileID) { 125 if (Reason != EnterFile) 126 return; 127 if (LastInsertedFileChange == FileChanges.end()) 128 // we didn't reach this file (eg: the main file) via an inclusion directive 129 return; 130 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 131 LastInsertedFileChange->second.FileType = NewFileType; 132 LastInsertedFileChange = FileChanges.end(); 133} 134 135/// Called whenever an inclusion is skipped due to canonical header protection 136/// macros. 137void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 138 const Token &/*FilenameTok*/, 139 SrcMgr::CharacteristicKind /*FileType*/) { 140 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 141 "found via an inclusion directive, was skipped"); 142 FileChanges.erase(LastInsertedFileChange); 143 LastInsertedFileChange = FileChanges.end(); 144} 145 146/// This should be called whenever the preprocessor encounters include 147/// directives. It does not say whether the file has been included, but it 148/// provides more information about the directive (hash location instead 149/// of location inside the included file). It is assumed that the matching 150/// FileChanged() or FileSkipped() is called after this. 151void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 152 const Token &/*IncludeTok*/, 153 StringRef /*FileName*/, 154 bool /*IsAngled*/, 155 const FileEntry * /*File*/, 156 SourceLocation /*EndLoc*/, 157 StringRef /*SearchPath*/, 158 StringRef /*RelativePath*/) { 159 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 160 "directive was found before the previous one was processed"); 161 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 162 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc))); 163 assert(p.second && "Unexpected revisitation of the same include directive"); 164 LastInsertedFileChange = p.first; 165} 166 167/// Simple lookup for a SourceLocation (specifically one denoting the hash in 168/// an inclusion directive) in the map of inclusion information, FileChanges. 169const InclusionRewriter::FileChange * 170InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 171 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 172 if (I != FileChanges.end()) 173 return &I->second; 174 return NULL; 175} 176 177/// Detect the likely line ending style of \p FromFile by examining the first 178/// newline found within it. 179static StringRef DetectEOL(const MemoryBuffer &FromFile) { 180 // detect what line endings the file uses, so that added content does not mix 181 // the style 182 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 183 if (Pos == NULL) 184 return "\n"; 185 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 186 return "\n\r"; 187 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 188 return "\r\n"; 189 return "\n"; 190} 191 192/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 193/// \p WriteTo - 1. 194void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 195 unsigned &WriteFrom, unsigned WriteTo, 196 StringRef EOL, int &Line, 197 bool EnsureNewline) { 198 if (WriteTo <= WriteFrom) 199 return; 200 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 201 // count lines manually, it's faster than getPresumedLoc() 202 Line += std::count(FromFile.getBufferStart() + WriteFrom, 203 FromFile.getBufferStart() + WriteTo, '\n'); 204 if (EnsureNewline) { 205 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 206 if (LastChar != '\n' && LastChar != '\r') 207 OS << EOL; 208 } 209 WriteFrom = WriteTo; 210} 211 212/// Print characters from \p FromFile starting at \p NextToWrite up until the 213/// inclusion directive at \p StartToken, then print out the inclusion 214/// inclusion directive disabled by a #if directive, updating \p NextToWrite 215/// and \p Line to track the number of source lines visited and the progress 216/// through the \p FromFile buffer. 217void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 218 const Token &StartToken, 219 const MemoryBuffer &FromFile, 220 StringRef EOL, 221 unsigned &NextToWrite, int &Line) { 222 OutputContentUpTo(FromFile, NextToWrite, 223 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 224 Token DirectiveToken; 225 do { 226 DirectiveLex.LexFromRawLexer(DirectiveToken); 227 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 228 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 229 OutputContentUpTo(FromFile, NextToWrite, 230 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 231 EOL, Line); 232 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 233} 234 235/// Find the next identifier in the pragma directive specified by \p RawToken. 236StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 237 Token &RawToken) { 238 RawLex.LexFromRawLexer(RawToken); 239 if (RawToken.is(tok::raw_identifier)) 240 PP.LookUpIdentifierInfo(RawToken); 241 if (RawToken.is(tok::identifier)) 242 return RawToken.getIdentifierInfo()->getName(); 243 return StringRef(); 244} 245 246/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 247/// and including content of included files recursively. 248bool InclusionRewriter::Process(FileID FileId, 249 SrcMgr::CharacteristicKind FileType) 250{ 251 bool Invalid; 252 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 253 if (Invalid) // invalid inclusion 254 return true; 255 const char *FileName = FromFile.getBufferIdentifier(); 256 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 257 RawLex.SetCommentRetentionState(false); 258 259 StringRef EOL = DetectEOL(FromFile); 260 261 // Per the GNU docs: "1" indicates the start of a new file. 262 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 263 264 if (SM.getFileIDSize(FileId) == 0) 265 return true; 266 267 // The next byte to be copied from the source file 268 unsigned NextToWrite = 0; 269 int Line = 1; // The current input file line number. 270 271 Token RawToken; 272 RawLex.LexFromRawLexer(RawToken); 273 274 // TODO: Consider adding a switch that strips possibly unimportant content, 275 // such as comments, to reduce the size of repro files. 276 while (RawToken.isNot(tok::eof)) { 277 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 278 RawLex.setParsingPreprocessorDirective(true); 279 Token HashToken = RawToken; 280 RawLex.LexFromRawLexer(RawToken); 281 if (RawToken.is(tok::raw_identifier)) 282 PP.LookUpIdentifierInfo(RawToken); 283 if (RawToken.is(tok::identifier)) { 284 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 285 case tok::pp_include: 286 case tok::pp_include_next: 287 case tok::pp_import: { 288 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 289 Line); 290 if (const FileChange *Change = FindFileChangeLocation( 291 HashToken.getLocation())) { 292 // now include and recursively process the file 293 if (Process(Change->Id, Change->FileType)) 294 // and set lineinfo back to this file, if the nested one was 295 // actually included 296 // `2' indicates returning to a file (after having included 297 // another file. 298 WriteLineInfo(FileName, Line, FileType, EOL, " 2"); 299 } else 300 // fix up lineinfo (since commented out directive changed line 301 // numbers) for inclusions that were skipped due to header guards 302 WriteLineInfo(FileName, Line, FileType, EOL); 303 break; 304 } 305 case tok::pp_pragma: { 306 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 307 if (Identifier == "clang" || Identifier == "GCC") { 308 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 309 // keep the directive in, commented out 310 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 311 NextToWrite, Line); 312 // update our own type 313 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 314 WriteLineInfo(FileName, Line, FileType, EOL); 315 } 316 } else if (Identifier == "once") { 317 // keep the directive in, commented out 318 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 319 NextToWrite, Line); 320 WriteLineInfo(FileName, Line, FileType, EOL); 321 } 322 break; 323 } 324 default: 325 break; 326 } 327 } 328 RawLex.setParsingPreprocessorDirective(false); 329 } 330 RawLex.LexFromRawLexer(RawToken); 331 } 332 OutputContentUpTo(FromFile, NextToWrite, 333 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, 334 /*EnsureNewline*/true); 335 return true; 336} 337 338/// InclusionRewriterInInput - Implement -frewrite-includes mode. 339void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 340 const PreprocessorOutputOptions &Opts) { 341 SourceManager &SM = PP.getSourceManager(); 342 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 343 Opts.ShowLineMarkers); 344 PP.addPPCallbacks(Rewrite); 345 346 // First let the preprocessor process the entire file and call callbacks. 347 // Callbacks will record which #include's were actually performed. 348 PP.EnterMainSourceFile(); 349 Token Tok; 350 // Only preprocessor directives matter here, so disable macro expansion 351 // everywhere else as an optimization. 352 // TODO: It would be even faster if the preprocessor could be switched 353 // to a mode where it would parse only preprocessor directives and comments, 354 // nothing else matters for parsing or processing. 355 PP.SetMacroExpansionOnlyInDirectives(); 356 do { 357 PP.Lex(Tok); 358 } while (Tok.isNot(tok::eof)); 359 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 360 OS->flush(); 361} 362