InclusionRewriter.cpp revision 959dc8475fc20ce8c3fd55021cb9f02a531cddc5
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Basic/SourceManager.h" 17#include "clang/Frontend/PreprocessorOutputOptions.h" 18#include "clang/Lex/Preprocessor.h" 19#include "llvm/Support/raw_ostream.h" 20 21using namespace clang; 22using namespace llvm; 23 24namespace { 25 26class InclusionRewriter : public PPCallbacks { 27 /// Information about which #includes were actually performed, 28 /// created by preprocessor callbacks. 29 struct FileChange { 30 SourceLocation From; 31 FileID Id; 32 SrcMgr::CharacteristicKind FileType; 33 FileChange(SourceLocation From) : From(From) { 34 } 35 }; 36 Preprocessor &PP; ///< Used to find inclusion directives. 37 SourceManager &SM; ///< Used to read and manage source files. 38 raw_ostream &OS; ///< The destination stream for rewritten contents. 39 bool ShowLineMarkers; ///< Show #line markers. 40 bool UseLineDirective; ///< Use of line directives or line markers. 41 typedef std::map<unsigned, FileChange> FileChangeMap; 42 FileChangeMap FileChanges; ///< Tracks which files were included where. 43 /// Used transitively for building up the FileChanges mapping over the 44 /// various \c PPCallbacks callbacks. 45 FileChangeMap::iterator LastInsertedFileChange; 46public: 47 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 48 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 49private: 50 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 51 SrcMgr::CharacteristicKind FileType, 52 FileID PrevFID); 53 virtual void FileSkipped(const FileEntry &ParentFile, 54 const Token &FilenameTok, 55 SrcMgr::CharacteristicKind FileType); 56 virtual void InclusionDirective(SourceLocation HashLoc, 57 const Token &IncludeTok, 58 StringRef FileName, 59 bool IsAngled, 60 CharSourceRange FilenameRange, 61 const FileEntry *File, 62 StringRef SearchPath, 63 StringRef RelativePath, 64 const Module *Imported); 65 void WriteLineInfo(const char *Filename, int Line, 66 SrcMgr::CharacteristicKind FileType, 67 StringRef EOL, StringRef Extra = StringRef()); 68 void OutputContentUpTo(const MemoryBuffer &FromFile, 69 unsigned &WriteFrom, unsigned WriteTo, 70 StringRef EOL, int &lines, 71 bool EnsureNewline = false); 72 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 73 const MemoryBuffer &FromFile, StringRef EOL, 74 unsigned &NextToWrite, int &Lines); 75 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 76 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 77}; 78 79} // end anonymous namespace 80 81/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 82InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 83 bool ShowLineMarkers) 84 : PP(PP), SM(PP.getSourceManager()), OS(OS), 85 ShowLineMarkers(ShowLineMarkers), 86 LastInsertedFileChange(FileChanges.end()) { 87 // If we're in microsoft mode, use normal #line instead of line markers. 88 UseLineDirective = PP.getLangOpts().MicrosoftExt; 89} 90 91/// Write appropriate line information as either #line directives or GNU line 92/// markers depending on what mode we're in, including the \p Filename and 93/// \p Line we are located at, using the specified \p EOL line separator, and 94/// any \p Extra context specifiers in GNU line directives. 95void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 96 SrcMgr::CharacteristicKind FileType, 97 StringRef EOL, StringRef Extra) { 98 if (!ShowLineMarkers) 99 return; 100 if (UseLineDirective) { 101 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 102 } else { 103 // Use GNU linemarkers as described here: 104 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 105 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 106 if (!Extra.empty()) 107 OS << Extra; 108 if (FileType == SrcMgr::C_System) 109 // "`3' This indicates that the following text comes from a system header 110 // file, so certain warnings should be suppressed." 111 OS << " 3"; 112 else if (FileType == SrcMgr::C_ExternCSystem) 113 // as above for `3', plus "`4' This indicates that the following text 114 // should be treated as being wrapped in an implicit extern "C" block." 115 OS << " 3 4"; 116 } 117 OS << EOL; 118} 119 120/// FileChanged - Whenever the preprocessor enters or exits a #include file 121/// it invokes this handler. 122void InclusionRewriter::FileChanged(SourceLocation Loc, 123 FileChangeReason Reason, 124 SrcMgr::CharacteristicKind NewFileType, 125 FileID) { 126 if (Reason != EnterFile) 127 return; 128 if (LastInsertedFileChange == FileChanges.end()) 129 // we didn't reach this file (eg: the main file) via an inclusion directive 130 return; 131 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 132 LastInsertedFileChange->second.FileType = NewFileType; 133 LastInsertedFileChange = FileChanges.end(); 134} 135 136/// Called whenever an inclusion is skipped due to canonical header protection 137/// macros. 138void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 139 const Token &/*FilenameTok*/, 140 SrcMgr::CharacteristicKind /*FileType*/) { 141 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 142 "found via an inclusion directive, was skipped"); 143 FileChanges.erase(LastInsertedFileChange); 144 LastInsertedFileChange = FileChanges.end(); 145} 146 147/// This should be called whenever the preprocessor encounters include 148/// directives. It does not say whether the file has been included, but it 149/// provides more information about the directive (hash location instead 150/// of location inside the included file). It is assumed that the matching 151/// FileChanged() or FileSkipped() is called after this. 152void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 153 const Token &/*IncludeTok*/, 154 StringRef /*FileName*/, 155 bool /*IsAngled*/, 156 CharSourceRange /*FilenameRange*/, 157 const FileEntry * /*File*/, 158 StringRef /*SearchPath*/, 159 StringRef /*RelativePath*/, 160 const Module * /*Imported*/) { 161 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 162 "directive was found before the previous one was processed"); 163 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 164 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc))); 165 assert(p.second && "Unexpected revisitation of the same include directive"); 166 LastInsertedFileChange = p.first; 167} 168 169/// Simple lookup for a SourceLocation (specifically one denoting the hash in 170/// an inclusion directive) in the map of inclusion information, FileChanges. 171const InclusionRewriter::FileChange * 172InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 173 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 174 if (I != FileChanges.end()) 175 return &I->second; 176 return NULL; 177} 178 179/// Detect the likely line ending style of \p FromFile by examining the first 180/// newline found within it. 181static StringRef DetectEOL(const MemoryBuffer &FromFile) { 182 // detect what line endings the file uses, so that added content does not mix 183 // the style 184 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 185 if (Pos == NULL) 186 return "\n"; 187 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 188 return "\n\r"; 189 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 190 return "\r\n"; 191 return "\n"; 192} 193 194/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 195/// \p WriteTo - 1. 196void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 197 unsigned &WriteFrom, unsigned WriteTo, 198 StringRef EOL, int &Line, 199 bool EnsureNewline) { 200 if (WriteTo <= WriteFrom) 201 return; 202 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 203 // count lines manually, it's faster than getPresumedLoc() 204 Line += std::count(FromFile.getBufferStart() + WriteFrom, 205 FromFile.getBufferStart() + WriteTo, '\n'); 206 if (EnsureNewline) { 207 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 208 if (LastChar != '\n' && LastChar != '\r') 209 OS << EOL; 210 } 211 WriteFrom = WriteTo; 212} 213 214/// Print characters from \p FromFile starting at \p NextToWrite up until the 215/// inclusion directive at \p StartToken, then print out the inclusion 216/// inclusion directive disabled by a #if directive, updating \p NextToWrite 217/// and \p Line to track the number of source lines visited and the progress 218/// through the \p FromFile buffer. 219void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 220 const Token &StartToken, 221 const MemoryBuffer &FromFile, 222 StringRef EOL, 223 unsigned &NextToWrite, int &Line) { 224 OutputContentUpTo(FromFile, NextToWrite, 225 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 226 Token DirectiveToken; 227 do { 228 DirectiveLex.LexFromRawLexer(DirectiveToken); 229 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 230 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 231 OutputContentUpTo(FromFile, NextToWrite, 232 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 233 EOL, Line); 234 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 235} 236 237/// Find the next identifier in the pragma directive specified by \p RawToken. 238StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 239 Token &RawToken) { 240 RawLex.LexFromRawLexer(RawToken); 241 if (RawToken.is(tok::raw_identifier)) 242 PP.LookUpIdentifierInfo(RawToken); 243 if (RawToken.is(tok::identifier)) 244 return RawToken.getIdentifierInfo()->getName(); 245 return StringRef(); 246} 247 248/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 249/// and including content of included files recursively. 250bool InclusionRewriter::Process(FileID FileId, 251 SrcMgr::CharacteristicKind FileType) 252{ 253 bool Invalid; 254 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 255 if (Invalid) // invalid inclusion 256 return true; 257 const char *FileName = FromFile.getBufferIdentifier(); 258 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 259 RawLex.SetCommentRetentionState(false); 260 261 StringRef EOL = DetectEOL(FromFile); 262 263 // Per the GNU docs: "1" indicates the start of a new file. 264 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 265 266 if (SM.getFileIDSize(FileId) == 0) 267 return true; 268 269 // The next byte to be copied from the source file 270 unsigned NextToWrite = 0; 271 int Line = 1; // The current input file line number. 272 273 Token RawToken; 274 RawLex.LexFromRawLexer(RawToken); 275 276 // TODO: Consider adding a switch that strips possibly unimportant content, 277 // such as comments, to reduce the size of repro files. 278 while (RawToken.isNot(tok::eof)) { 279 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 280 RawLex.setParsingPreprocessorDirective(true); 281 Token HashToken = RawToken; 282 RawLex.LexFromRawLexer(RawToken); 283 if (RawToken.is(tok::raw_identifier)) 284 PP.LookUpIdentifierInfo(RawToken); 285 if (RawToken.is(tok::identifier)) { 286 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 287 case tok::pp_include: 288 case tok::pp_include_next: 289 case tok::pp_import: { 290 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 291 Line); 292 if (const FileChange *Change = FindFileChangeLocation( 293 HashToken.getLocation())) { 294 // now include and recursively process the file 295 if (Process(Change->Id, Change->FileType)) 296 // and set lineinfo back to this file, if the nested one was 297 // actually included 298 // `2' indicates returning to a file (after having included 299 // another file. 300 WriteLineInfo(FileName, Line, FileType, EOL, " 2"); 301 } else 302 // fix up lineinfo (since commented out directive changed line 303 // numbers) for inclusions that were skipped due to header guards 304 WriteLineInfo(FileName, Line, FileType, EOL); 305 break; 306 } 307 case tok::pp_pragma: { 308 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 309 if (Identifier == "clang" || Identifier == "GCC") { 310 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 311 // keep the directive in, commented out 312 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 313 NextToWrite, Line); 314 // update our own type 315 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 316 WriteLineInfo(FileName, Line, FileType, EOL); 317 } 318 } else if (Identifier == "once") { 319 // keep the directive in, commented out 320 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 321 NextToWrite, Line); 322 WriteLineInfo(FileName, Line, FileType, EOL); 323 } 324 break; 325 } 326 default: 327 break; 328 } 329 } 330 RawLex.setParsingPreprocessorDirective(false); 331 } 332 RawLex.LexFromRawLexer(RawToken); 333 } 334 OutputContentUpTo(FromFile, NextToWrite, 335 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, 336 /*EnsureNewline*/true); 337 return true; 338} 339 340/// InclusionRewriterInInput - Implement -frewrite-includes mode. 341void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 342 const PreprocessorOutputOptions &Opts) { 343 SourceManager &SM = PP.getSourceManager(); 344 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 345 Opts.ShowLineMarkers); 346 PP.addPPCallbacks(Rewrite); 347 348 // First let the preprocessor process the entire file and call callbacks. 349 // Callbacks will record which #include's were actually performed. 350 PP.EnterMainSourceFile(); 351 Token Tok; 352 // Only preprocessor directives matter here, so disable macro expansion 353 // everywhere else as an optimization. 354 // TODO: It would be even faster if the preprocessor could be switched 355 // to a mode where it would parse only preprocessor directives and comments, 356 // nothing else matters for parsing or processing. 357 PP.SetMacroExpansionOnlyInDirectives(); 358 do { 359 PP.Lex(Tok); 360 } while (Tok.isNot(tok::eof)); 361 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 362 OS->flush(); 363} 364