InclusionRewriter.cpp revision 596eea7cc26979c952a0b177d024787a99b299df
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Basic/SourceManager.h" 17#include "clang/Frontend/PreprocessorOutputOptions.h" 18#include "clang/Lex/HeaderSearch.h" 19#include "clang/Lex/Preprocessor.h" 20#include "llvm/ADT/SmallString.h" 21#include "llvm/Support/raw_ostream.h" 22 23using namespace clang; 24using namespace llvm; 25 26namespace { 27 28class InclusionRewriter : public PPCallbacks { 29 /// Information about which #includes were actually performed, 30 /// created by preprocessor callbacks. 31 struct FileChange { 32 const Module *Mod; 33 SourceLocation From; 34 FileID Id; 35 SrcMgr::CharacteristicKind FileType; 36 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 37 } 38 }; 39 Preprocessor &PP; ///< Used to find inclusion directives. 40 SourceManager &SM; ///< Used to read and manage source files. 41 raw_ostream &OS; ///< The destination stream for rewritten contents. 42 bool ShowLineMarkers; ///< Show #line markers. 43 bool UseLineDirective; ///< Use of line directives or line markers. 44 typedef std::map<unsigned, FileChange> FileChangeMap; 45 FileChangeMap FileChanges; ///< Tracks which files were included where. 46 /// Used transitively for building up the FileChanges mapping over the 47 /// various \c PPCallbacks callbacks. 48 FileChangeMap::iterator LastInsertedFileChange; 49public: 50 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 51 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 52private: 53 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 54 SrcMgr::CharacteristicKind FileType, 55 FileID PrevFID); 56 virtual void FileSkipped(const FileEntry &ParentFile, 57 const Token &FilenameTok, 58 SrcMgr::CharacteristicKind FileType); 59 virtual void InclusionDirective(SourceLocation HashLoc, 60 const Token &IncludeTok, 61 StringRef FileName, 62 bool IsAngled, 63 CharSourceRange FilenameRange, 64 const FileEntry *File, 65 StringRef SearchPath, 66 StringRef RelativePath, 67 const Module *Imported); 68 void WriteLineInfo(const char *Filename, int Line, 69 SrcMgr::CharacteristicKind FileType, 70 StringRef EOL, StringRef Extra = StringRef()); 71 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 72 void OutputContentUpTo(const MemoryBuffer &FromFile, 73 unsigned &WriteFrom, unsigned WriteTo, 74 StringRef EOL, int &lines, 75 bool EnsureNewline = false); 76 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 77 const MemoryBuffer &FromFile, StringRef EOL, 78 unsigned &NextToWrite, int &Lines); 79 bool HandleHasInclude(FileID FileId, Lexer &RawLex, 80 const DirectoryLookup *Lookup, Token &Tok, 81 bool &FileExists); 82 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 83 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 84}; 85 86} // end anonymous namespace 87 88/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 89InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 90 bool ShowLineMarkers) 91 : PP(PP), SM(PP.getSourceManager()), OS(OS), 92 ShowLineMarkers(ShowLineMarkers), 93 LastInsertedFileChange(FileChanges.end()) { 94 // If we're in microsoft mode, use normal #line instead of line markers. 95 UseLineDirective = PP.getLangOpts().MicrosoftExt; 96} 97 98/// Write appropriate line information as either #line directives or GNU line 99/// markers depending on what mode we're in, including the \p Filename and 100/// \p Line we are located at, using the specified \p EOL line separator, and 101/// any \p Extra context specifiers in GNU line directives. 102void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 103 SrcMgr::CharacteristicKind FileType, 104 StringRef EOL, StringRef Extra) { 105 if (!ShowLineMarkers) 106 return; 107 if (UseLineDirective) { 108 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 109 } else { 110 // Use GNU linemarkers as described here: 111 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 112 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 113 if (!Extra.empty()) 114 OS << Extra; 115 if (FileType == SrcMgr::C_System) 116 // "`3' This indicates that the following text comes from a system header 117 // file, so certain warnings should be suppressed." 118 OS << " 3"; 119 else if (FileType == SrcMgr::C_ExternCSystem) 120 // as above for `3', plus "`4' This indicates that the following text 121 // should be treated as being wrapped in an implicit extern "C" block." 122 OS << " 3 4"; 123 } 124 OS << EOL; 125} 126 127void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 128 StringRef EOL) { 129 OS << "@import " << Mod->getFullModuleName() << ";" 130 << " /* clang -frewrite-includes: implicit import */" << EOL; 131} 132 133/// FileChanged - Whenever the preprocessor enters or exits a #include file 134/// it invokes this handler. 135void InclusionRewriter::FileChanged(SourceLocation Loc, 136 FileChangeReason Reason, 137 SrcMgr::CharacteristicKind NewFileType, 138 FileID) { 139 if (Reason != EnterFile) 140 return; 141 if (LastInsertedFileChange == FileChanges.end()) 142 // we didn't reach this file (eg: the main file) via an inclusion directive 143 return; 144 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 145 LastInsertedFileChange->second.FileType = NewFileType; 146 LastInsertedFileChange = FileChanges.end(); 147} 148 149/// Called whenever an inclusion is skipped due to canonical header protection 150/// macros. 151void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 152 const Token &/*FilenameTok*/, 153 SrcMgr::CharacteristicKind /*FileType*/) { 154 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 155 "found via an inclusion directive, was skipped"); 156 FileChanges.erase(LastInsertedFileChange); 157 LastInsertedFileChange = FileChanges.end(); 158} 159 160/// This should be called whenever the preprocessor encounters include 161/// directives. It does not say whether the file has been included, but it 162/// provides more information about the directive (hash location instead 163/// of location inside the included file). It is assumed that the matching 164/// FileChanged() or FileSkipped() is called after this. 165void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 166 const Token &/*IncludeTok*/, 167 StringRef /*FileName*/, 168 bool /*IsAngled*/, 169 CharSourceRange /*FilenameRange*/, 170 const FileEntry * /*File*/, 171 StringRef /*SearchPath*/, 172 StringRef /*RelativePath*/, 173 const Module *Imported) { 174 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 175 "directive was found before the previous one was processed"); 176 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 177 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 178 assert(p.second && "Unexpected revisitation of the same include directive"); 179 if (!Imported) 180 LastInsertedFileChange = p.first; 181} 182 183/// Simple lookup for a SourceLocation (specifically one denoting the hash in 184/// an inclusion directive) in the map of inclusion information, FileChanges. 185const InclusionRewriter::FileChange * 186InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 187 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 188 if (I != FileChanges.end()) 189 return &I->second; 190 return NULL; 191} 192 193/// Detect the likely line ending style of \p FromFile by examining the first 194/// newline found within it. 195static StringRef DetectEOL(const MemoryBuffer &FromFile) { 196 // detect what line endings the file uses, so that added content does not mix 197 // the style 198 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 199 if (Pos == NULL) 200 return "\n"; 201 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 202 return "\n\r"; 203 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 204 return "\r\n"; 205 return "\n"; 206} 207 208/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 209/// \p WriteTo - 1. 210void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 211 unsigned &WriteFrom, unsigned WriteTo, 212 StringRef EOL, int &Line, 213 bool EnsureNewline) { 214 if (WriteTo <= WriteFrom) 215 return; 216 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 217 // count lines manually, it's faster than getPresumedLoc() 218 Line += std::count(FromFile.getBufferStart() + WriteFrom, 219 FromFile.getBufferStart() + WriteTo, '\n'); 220 if (EnsureNewline) { 221 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 222 if (LastChar != '\n' && LastChar != '\r') 223 OS << EOL; 224 } 225 WriteFrom = WriteTo; 226} 227 228/// Print characters from \p FromFile starting at \p NextToWrite up until the 229/// inclusion directive at \p StartToken, then print out the inclusion 230/// inclusion directive disabled by a #if directive, updating \p NextToWrite 231/// and \p Line to track the number of source lines visited and the progress 232/// through the \p FromFile buffer. 233void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 234 const Token &StartToken, 235 const MemoryBuffer &FromFile, 236 StringRef EOL, 237 unsigned &NextToWrite, int &Line) { 238 OutputContentUpTo(FromFile, NextToWrite, 239 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 240 Token DirectiveToken; 241 do { 242 DirectiveLex.LexFromRawLexer(DirectiveToken); 243 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 244 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 245 OutputContentUpTo(FromFile, NextToWrite, 246 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 247 EOL, Line); 248 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 249} 250 251/// Find the next identifier in the pragma directive specified by \p RawToken. 252StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 253 Token &RawToken) { 254 RawLex.LexFromRawLexer(RawToken); 255 if (RawToken.is(tok::raw_identifier)) 256 PP.LookUpIdentifierInfo(RawToken); 257 if (RawToken.is(tok::identifier)) 258 return RawToken.getIdentifierInfo()->getName(); 259 return StringRef(); 260} 261 262// Expand __has_include and __has_include_next if possible. If there's no 263// definitive answer return false. 264bool InclusionRewriter::HandleHasInclude( 265 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok, 266 bool &FileExists) { 267 // Lex the opening paren. 268 RawLex.LexFromRawLexer(Tok); 269 if (Tok.isNot(tok::l_paren)) 270 return false; 271 272 RawLex.LexFromRawLexer(Tok); 273 274 SmallString<128> FilenameBuffer; 275 StringRef Filename; 276 // Since the raw lexer doesn't give us angle_literals we have to parse them 277 // ourselves. 278 // FIXME: What to do if the file name is a macro? 279 if (Tok.is(tok::less)) { 280 RawLex.LexFromRawLexer(Tok); 281 282 FilenameBuffer += '<'; 283 do { 284 if (Tok.is(tok::eod)) // Sanity check. 285 return false; 286 287 if (Tok.is(tok::raw_identifier)) 288 PP.LookUpIdentifierInfo(Tok); 289 290 // Get the string piece. 291 SmallVector<char, 128> TmpBuffer; 292 bool Invalid = false; 293 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid); 294 if (Invalid) 295 return false; 296 297 FilenameBuffer += TmpName; 298 299 RawLex.LexFromRawLexer(Tok); 300 } while (Tok.isNot(tok::greater)); 301 302 FilenameBuffer += '>'; 303 Filename = FilenameBuffer; 304 } else { 305 if (Tok.isNot(tok::string_literal)) 306 return false; 307 308 bool Invalid = false; 309 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); 310 if (Invalid) 311 return false; 312 } 313 314 // Lex the closing paren. 315 RawLex.LexFromRawLexer(Tok); 316 if (Tok.isNot(tok::r_paren)) 317 return false; 318 319 // Now ask HeaderInfo if it knows about the header. 320 // FIXME: Subframeworks aren't handled here. Do we care? 321 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); 322 const DirectoryLookup *CurDir; 323 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile( 324 Filename, isAngled, 0, CurDir, 325 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false); 326 327 FileExists = File != 0; 328 return true; 329} 330 331/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 332/// and including content of included files recursively. 333bool InclusionRewriter::Process(FileID FileId, 334 SrcMgr::CharacteristicKind FileType) 335{ 336 bool Invalid; 337 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 338 if (Invalid) // invalid inclusion 339 return false; 340 const char *FileName = FromFile.getBufferIdentifier(); 341 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 342 RawLex.SetCommentRetentionState(false); 343 344 StringRef EOL = DetectEOL(FromFile); 345 346 // Per the GNU docs: "1" indicates the start of a new file. 347 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 348 349 if (SM.getFileIDSize(FileId) == 0) 350 return false; 351 352 // The next byte to be copied from the source file 353 unsigned NextToWrite = 0; 354 int Line = 1; // The current input file line number. 355 356 Token RawToken; 357 RawLex.LexFromRawLexer(RawToken); 358 359 // TODO: Consider adding a switch that strips possibly unimportant content, 360 // such as comments, to reduce the size of repro files. 361 while (RawToken.isNot(tok::eof)) { 362 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 363 RawLex.setParsingPreprocessorDirective(true); 364 Token HashToken = RawToken; 365 RawLex.LexFromRawLexer(RawToken); 366 if (RawToken.is(tok::raw_identifier)) 367 PP.LookUpIdentifierInfo(RawToken); 368 if (RawToken.is(tok::identifier) || RawToken.is(tok::kw_if)) { 369 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 370 case tok::pp_include: 371 case tok::pp_include_next: 372 case tok::pp_import: { 373 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 374 Line); 375 StringRef LineInfoExtra; 376 if (const FileChange *Change = FindFileChangeLocation( 377 HashToken.getLocation())) { 378 if (Change->Mod) { 379 WriteImplicitModuleImport(Change->Mod, EOL); 380 381 // else now include and recursively process the file 382 } else if (Process(Change->Id, Change->FileType)) { 383 // and set lineinfo back to this file, if the nested one was 384 // actually included 385 // `2' indicates returning to a file (after having included 386 // another file. 387 LineInfoExtra = " 2"; 388 } 389 } 390 // fix up lineinfo (since commented out directive changed line 391 // numbers) for inclusions that were skipped due to header guards 392 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 393 break; 394 } 395 case tok::pp_pragma: { 396 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 397 if (Identifier == "clang" || Identifier == "GCC") { 398 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 399 // keep the directive in, commented out 400 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 401 NextToWrite, Line); 402 // update our own type 403 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 404 WriteLineInfo(FileName, Line, FileType, EOL); 405 } 406 } else if (Identifier == "once") { 407 // keep the directive in, commented out 408 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 409 NextToWrite, Line); 410 WriteLineInfo(FileName, Line, FileType, EOL); 411 } 412 break; 413 } 414 case tok::pp_if: 415 case tok::pp_elif: 416 // Rewrite special builtin macros to avoid pulling in host details. 417 do { 418 // Walk over the directive. 419 RawLex.LexFromRawLexer(RawToken); 420 if (RawToken.is(tok::raw_identifier)) 421 PP.LookUpIdentifierInfo(RawToken); 422 423 if (RawToken.is(tok::identifier)) { 424 bool HasFile; 425 SourceLocation Loc = RawToken.getLocation(); 426 427 // Rewrite __has_include(x) 428 if (RawToken.getIdentifierInfo()->isStr("__has_include")) { 429 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile)) 430 continue; 431 // Rewrite __has_include_next(x) 432 } else if (RawToken.getIdentifierInfo()->isStr( 433 "__has_include_next")) { 434 const DirectoryLookup *Lookup = PP.GetCurDirLookup(); 435 if (Lookup) 436 ++Lookup; 437 438 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken, 439 HasFile)) 440 continue; 441 } else { 442 continue; 443 } 444 // Replace the macro with (0) or (1), followed by the commented 445 // out macro for reference. 446 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), 447 EOL, Line); 448 OS << '(' << (int) HasFile << ")/*"; 449 OutputContentUpTo(FromFile, NextToWrite, 450 SM.getFileOffset(RawToken.getLocation()) + 451 RawToken.getLength(), 452 EOL, Line); 453 OS << "*/"; 454 } 455 } while (RawToken.isNot(tok::eod)); 456 457 break; 458 default: 459 break; 460 } 461 } 462 RawLex.setParsingPreprocessorDirective(false); 463 } 464 RawLex.LexFromRawLexer(RawToken); 465 } 466 OutputContentUpTo(FromFile, NextToWrite, 467 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, 468 /*EnsureNewline*/true); 469 return true; 470} 471 472/// InclusionRewriterInInput - Implement -frewrite-includes mode. 473void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 474 const PreprocessorOutputOptions &Opts) { 475 SourceManager &SM = PP.getSourceManager(); 476 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 477 Opts.ShowLineMarkers); 478 PP.addPPCallbacks(Rewrite); 479 480 // First let the preprocessor process the entire file and call callbacks. 481 // Callbacks will record which #include's were actually performed. 482 PP.EnterMainSourceFile(); 483 Token Tok; 484 // Only preprocessor directives matter here, so disable macro expansion 485 // everywhere else as an optimization. 486 // TODO: It would be even faster if the preprocessor could be switched 487 // to a mode where it would parse only preprocessor directives and comments, 488 // nothing else matters for parsing or processing. 489 PP.SetMacroExpansionOnlyInDirectives(); 490 do { 491 PP.Lex(Tok); 492 } while (Tok.isNot(tok::eof)); 493 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 494 OS->flush(); 495} 496