InclusionRewriter.cpp revision 03409967bd9860ffb2ff6b38126e04493c55567f
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Basic/SourceManager.h" 17#include "clang/Frontend/PreprocessorOutputOptions.h" 18#include "clang/Lex/Preprocessor.h" 19#include "llvm/Support/raw_ostream.h" 20 21using namespace clang; 22using namespace llvm; 23 24namespace { 25 26class InclusionRewriter : public PPCallbacks { 27 /// Information about which #includes were actually performed, 28 /// created by preprocessor callbacks. 29 struct FileChange { 30 const Module *Mod; 31 SourceLocation From; 32 FileID Id; 33 SrcMgr::CharacteristicKind FileType; 34 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 35 } 36 }; 37 Preprocessor &PP; ///< Used to find inclusion directives. 38 SourceManager &SM; ///< Used to read and manage source files. 39 raw_ostream &OS; ///< The destination stream for rewritten contents. 40 bool ShowLineMarkers; ///< Show #line markers. 41 bool UseLineDirective; ///< Use of line directives or line markers. 42 typedef std::map<unsigned, FileChange> FileChangeMap; 43 FileChangeMap FileChanges; ///< Tracks which files were included where. 44 /// Used transitively for building up the FileChanges mapping over the 45 /// various \c PPCallbacks callbacks. 46 FileChangeMap::iterator LastInsertedFileChange; 47public: 48 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 49 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 50private: 51 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 52 SrcMgr::CharacteristicKind FileType, 53 FileID PrevFID); 54 virtual void FileSkipped(const FileEntry &ParentFile, 55 const Token &FilenameTok, 56 SrcMgr::CharacteristicKind FileType); 57 virtual void InclusionDirective(SourceLocation HashLoc, 58 const Token &IncludeTok, 59 StringRef FileName, 60 bool IsAngled, 61 CharSourceRange FilenameRange, 62 const FileEntry *File, 63 StringRef SearchPath, 64 StringRef RelativePath, 65 const Module *Imported); 66 void WriteLineInfo(const char *Filename, int Line, 67 SrcMgr::CharacteristicKind FileType, 68 StringRef EOL, StringRef Extra = StringRef()); 69 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 70 void OutputContentUpTo(const MemoryBuffer &FromFile, 71 unsigned &WriteFrom, unsigned WriteTo, 72 StringRef EOL, int &lines, 73 bool EnsureNewline = false); 74 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 75 const MemoryBuffer &FromFile, StringRef EOL, 76 unsigned &NextToWrite, int &Lines); 77 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 78 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 79}; 80 81} // end anonymous namespace 82 83/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 84InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 85 bool ShowLineMarkers) 86 : PP(PP), SM(PP.getSourceManager()), OS(OS), 87 ShowLineMarkers(ShowLineMarkers), 88 LastInsertedFileChange(FileChanges.end()) { 89 // If we're in microsoft mode, use normal #line instead of line markers. 90 UseLineDirective = PP.getLangOpts().MicrosoftExt; 91} 92 93/// Write appropriate line information as either #line directives or GNU line 94/// markers depending on what mode we're in, including the \p Filename and 95/// \p Line we are located at, using the specified \p EOL line separator, and 96/// any \p Extra context specifiers in GNU line directives. 97void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 98 SrcMgr::CharacteristicKind FileType, 99 StringRef EOL, StringRef Extra) { 100 if (!ShowLineMarkers) 101 return; 102 if (UseLineDirective) { 103 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 104 } else { 105 // Use GNU linemarkers as described here: 106 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 107 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 108 if (!Extra.empty()) 109 OS << Extra; 110 if (FileType == SrcMgr::C_System) 111 // "`3' This indicates that the following text comes from a system header 112 // file, so certain warnings should be suppressed." 113 OS << " 3"; 114 else if (FileType == SrcMgr::C_ExternCSystem) 115 // as above for `3', plus "`4' This indicates that the following text 116 // should be treated as being wrapped in an implicit extern "C" block." 117 OS << " 3 4"; 118 } 119 OS << EOL; 120} 121 122void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 123 StringRef EOL) { 124 OS << "@import " << Mod->getFullModuleName() << ";" 125 << " /* clang -frewrite-includes: implicit import */" << EOL; 126} 127 128/// FileChanged - Whenever the preprocessor enters or exits a #include file 129/// it invokes this handler. 130void InclusionRewriter::FileChanged(SourceLocation Loc, 131 FileChangeReason Reason, 132 SrcMgr::CharacteristicKind NewFileType, 133 FileID) { 134 if (Reason != EnterFile) 135 return; 136 if (LastInsertedFileChange == FileChanges.end()) 137 // we didn't reach this file (eg: the main file) via an inclusion directive 138 return; 139 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 140 LastInsertedFileChange->second.FileType = NewFileType; 141 LastInsertedFileChange = FileChanges.end(); 142} 143 144/// Called whenever an inclusion is skipped due to canonical header protection 145/// macros. 146void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 147 const Token &/*FilenameTok*/, 148 SrcMgr::CharacteristicKind /*FileType*/) { 149 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 150 "found via an inclusion directive, was skipped"); 151 FileChanges.erase(LastInsertedFileChange); 152 LastInsertedFileChange = FileChanges.end(); 153} 154 155/// This should be called whenever the preprocessor encounters include 156/// directives. It does not say whether the file has been included, but it 157/// provides more information about the directive (hash location instead 158/// of location inside the included file). It is assumed that the matching 159/// FileChanged() or FileSkipped() is called after this. 160void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 161 const Token &/*IncludeTok*/, 162 StringRef /*FileName*/, 163 bool /*IsAngled*/, 164 CharSourceRange /*FilenameRange*/, 165 const FileEntry * /*File*/, 166 StringRef /*SearchPath*/, 167 StringRef /*RelativePath*/, 168 const Module *Imported) { 169 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 170 "directive was found before the previous one was processed"); 171 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 172 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 173 assert(p.second && "Unexpected revisitation of the same include directive"); 174 if (!Imported) 175 LastInsertedFileChange = p.first; 176} 177 178/// Simple lookup for a SourceLocation (specifically one denoting the hash in 179/// an inclusion directive) in the map of inclusion information, FileChanges. 180const InclusionRewriter::FileChange * 181InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 182 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 183 if (I != FileChanges.end()) 184 return &I->second; 185 return NULL; 186} 187 188/// Detect the likely line ending style of \p FromFile by examining the first 189/// newline found within it. 190static StringRef DetectEOL(const MemoryBuffer &FromFile) { 191 // detect what line endings the file uses, so that added content does not mix 192 // the style 193 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 194 if (Pos == NULL) 195 return "\n"; 196 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 197 return "\n\r"; 198 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 199 return "\r\n"; 200 return "\n"; 201} 202 203/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 204/// \p WriteTo - 1. 205void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 206 unsigned &WriteFrom, unsigned WriteTo, 207 StringRef EOL, int &Line, 208 bool EnsureNewline) { 209 if (WriteTo <= WriteFrom) 210 return; 211 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 212 // count lines manually, it's faster than getPresumedLoc() 213 Line += std::count(FromFile.getBufferStart() + WriteFrom, 214 FromFile.getBufferStart() + WriteTo, '\n'); 215 if (EnsureNewline) { 216 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 217 if (LastChar != '\n' && LastChar != '\r') 218 OS << EOL; 219 } 220 WriteFrom = WriteTo; 221} 222 223/// Print characters from \p FromFile starting at \p NextToWrite up until the 224/// inclusion directive at \p StartToken, then print out the inclusion 225/// inclusion directive disabled by a #if directive, updating \p NextToWrite 226/// and \p Line to track the number of source lines visited and the progress 227/// through the \p FromFile buffer. 228void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 229 const Token &StartToken, 230 const MemoryBuffer &FromFile, 231 StringRef EOL, 232 unsigned &NextToWrite, int &Line) { 233 OutputContentUpTo(FromFile, NextToWrite, 234 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 235 Token DirectiveToken; 236 do { 237 DirectiveLex.LexFromRawLexer(DirectiveToken); 238 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 239 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 240 OutputContentUpTo(FromFile, NextToWrite, 241 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 242 EOL, Line); 243 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 244} 245 246/// Find the next identifier in the pragma directive specified by \p RawToken. 247StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 248 Token &RawToken) { 249 RawLex.LexFromRawLexer(RawToken); 250 if (RawToken.is(tok::raw_identifier)) 251 PP.LookUpIdentifierInfo(RawToken); 252 if (RawToken.is(tok::identifier)) 253 return RawToken.getIdentifierInfo()->getName(); 254 return StringRef(); 255} 256 257/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 258/// and including content of included files recursively. 259bool InclusionRewriter::Process(FileID FileId, 260 SrcMgr::CharacteristicKind FileType) 261{ 262 bool Invalid; 263 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 264 if (Invalid) // invalid inclusion 265 return false; 266 const char *FileName = FromFile.getBufferIdentifier(); 267 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 268 RawLex.SetCommentRetentionState(false); 269 270 StringRef EOL = DetectEOL(FromFile); 271 272 // Per the GNU docs: "1" indicates the start of a new file. 273 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 274 275 if (SM.getFileIDSize(FileId) == 0) 276 return false; 277 278 // The next byte to be copied from the source file 279 unsigned NextToWrite = 0; 280 int Line = 1; // The current input file line number. 281 282 Token RawToken; 283 RawLex.LexFromRawLexer(RawToken); 284 285 // TODO: Consider adding a switch that strips possibly unimportant content, 286 // such as comments, to reduce the size of repro files. 287 while (RawToken.isNot(tok::eof)) { 288 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 289 RawLex.setParsingPreprocessorDirective(true); 290 Token HashToken = RawToken; 291 RawLex.LexFromRawLexer(RawToken); 292 if (RawToken.is(tok::raw_identifier)) 293 PP.LookUpIdentifierInfo(RawToken); 294 if (RawToken.is(tok::identifier)) { 295 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 296 case tok::pp_include: 297 case tok::pp_include_next: 298 case tok::pp_import: { 299 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 300 Line); 301 StringRef LineInfoExtra; 302 if (const FileChange *Change = FindFileChangeLocation( 303 HashToken.getLocation())) { 304 if (Change->Mod) { 305 WriteImplicitModuleImport(Change->Mod, EOL); 306 307 // else now include and recursively process the file 308 } else if (Process(Change->Id, Change->FileType)) { 309 // and set lineinfo back to this file, if the nested one was 310 // actually included 311 // `2' indicates returning to a file (after having included 312 // another file. 313 LineInfoExtra = " 2"; 314 } 315 } 316 // fix up lineinfo (since commented out directive changed line 317 // numbers) for inclusions that were skipped due to header guards 318 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 319 break; 320 } 321 case tok::pp_pragma: { 322 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 323 if (Identifier == "clang" || Identifier == "GCC") { 324 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 325 // keep the directive in, commented out 326 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 327 NextToWrite, Line); 328 // update our own type 329 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 330 WriteLineInfo(FileName, Line, FileType, EOL); 331 } 332 } else if (Identifier == "once") { 333 // keep the directive in, commented out 334 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 335 NextToWrite, Line); 336 WriteLineInfo(FileName, Line, FileType, EOL); 337 } 338 break; 339 } 340 default: 341 break; 342 } 343 } 344 RawLex.setParsingPreprocessorDirective(false); 345 } 346 RawLex.LexFromRawLexer(RawToken); 347 } 348 OutputContentUpTo(FromFile, NextToWrite, 349 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, 350 /*EnsureNewline*/true); 351 return true; 352} 353 354/// InclusionRewriterInInput - Implement -frewrite-includes mode. 355void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 356 const PreprocessorOutputOptions &Opts) { 357 SourceManager &SM = PP.getSourceManager(); 358 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 359 Opts.ShowLineMarkers); 360 PP.addPPCallbacks(Rewrite); 361 362 // First let the preprocessor process the entire file and call callbacks. 363 // Callbacks will record which #include's were actually performed. 364 PP.EnterMainSourceFile(); 365 Token Tok; 366 // Only preprocessor directives matter here, so disable macro expansion 367 // everywhere else as an optimization. 368 // TODO: It would be even faster if the preprocessor could be switched 369 // to a mode where it would parse only preprocessor directives and comments, 370 // nothing else matters for parsing or processing. 371 PP.SetMacroExpansionOnlyInDirectives(); 372 do { 373 PP.Lex(Tok); 374 } while (Tok.isNot(tok::eof)); 375 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 376 OS->flush(); 377} 378