InclusionRewriter.cpp revision f91fc4bfa3561990bbb39d3ed2045a75cbaa0df4
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Basic/SourceManager.h" 17#include "clang/Frontend/PreprocessorOutputOptions.h" 18#include "clang/Lex/HeaderSearch.h" 19#include "clang/Lex/Pragma.h" 20#include "clang/Lex/Preprocessor.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/Support/raw_ostream.h" 23 24using namespace clang; 25using namespace llvm; 26 27namespace { 28 29class InclusionRewriter : public PPCallbacks { 30 /// Information about which #includes were actually performed, 31 /// created by preprocessor callbacks. 32 struct FileChange { 33 const Module *Mod; 34 SourceLocation From; 35 FileID Id; 36 SrcMgr::CharacteristicKind FileType; 37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 38 } 39 }; 40 Preprocessor &PP; ///< Used to find inclusion directives. 41 SourceManager &SM; ///< Used to read and manage source files. 42 raw_ostream &OS; ///< The destination stream for rewritten contents. 43 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines. 44 bool ShowLineMarkers; ///< Show #line markers. 45 bool UseLineDirective; ///< Use of line directives or line markers. 46 typedef std::map<unsigned, FileChange> FileChangeMap; 47 FileChangeMap FileChanges; ///< Tracks which files were included where. 48 /// Used transitively for building up the FileChanges mapping over the 49 /// various \c PPCallbacks callbacks. 50 FileChangeMap::iterator LastInsertedFileChange; 51public: 52 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 53 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 54 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) { 55 PredefinesBuffer = Buf; 56 } 57private: 58 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 59 SrcMgr::CharacteristicKind FileType, 60 FileID PrevFID); 61 virtual void FileSkipped(const FileEntry &ParentFile, 62 const Token &FilenameTok, 63 SrcMgr::CharacteristicKind FileType); 64 virtual void InclusionDirective(SourceLocation HashLoc, 65 const Token &IncludeTok, 66 StringRef FileName, 67 bool IsAngled, 68 CharSourceRange FilenameRange, 69 const FileEntry *File, 70 StringRef SearchPath, 71 StringRef RelativePath, 72 const Module *Imported); 73 void WriteLineInfo(const char *Filename, int Line, 74 SrcMgr::CharacteristicKind FileType, 75 StringRef EOL, StringRef Extra = StringRef()); 76 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 77 void OutputContentUpTo(const MemoryBuffer &FromFile, 78 unsigned &WriteFrom, unsigned WriteTo, 79 StringRef EOL, int &lines, 80 bool EnsureNewline = false); 81 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 82 const MemoryBuffer &FromFile, StringRef EOL, 83 unsigned &NextToWrite, int &Lines); 84 bool HandleHasInclude(FileID FileId, Lexer &RawLex, 85 const DirectoryLookup *Lookup, Token &Tok, 86 bool &FileExists); 87 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 88 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 89}; 90 91} // end anonymous namespace 92 93/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 94InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 95 bool ShowLineMarkers) 96 : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(0), 97 ShowLineMarkers(ShowLineMarkers), 98 LastInsertedFileChange(FileChanges.end()) { 99 // If we're in microsoft mode, use normal #line instead of line markers. 100 UseLineDirective = PP.getLangOpts().MicrosoftExt; 101} 102 103/// Write appropriate line information as either #line directives or GNU line 104/// markers depending on what mode we're in, including the \p Filename and 105/// \p Line we are located at, using the specified \p EOL line separator, and 106/// any \p Extra context specifiers in GNU line directives. 107void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 108 SrcMgr::CharacteristicKind FileType, 109 StringRef EOL, StringRef Extra) { 110 if (!ShowLineMarkers) 111 return; 112 if (UseLineDirective) { 113 OS << "#line" << ' ' << Line << ' ' << '"'; 114 OS.write_escaped(Filename); 115 OS << '"'; 116 } else { 117 // Use GNU linemarkers as described here: 118 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 119 OS << '#' << ' ' << Line << ' ' << '"'; 120 OS.write_escaped(Filename); 121 OS << '"'; 122 if (!Extra.empty()) 123 OS << Extra; 124 if (FileType == SrcMgr::C_System) 125 // "`3' This indicates that the following text comes from a system header 126 // file, so certain warnings should be suppressed." 127 OS << " 3"; 128 else if (FileType == SrcMgr::C_ExternCSystem) 129 // as above for `3', plus "`4' This indicates that the following text 130 // should be treated as being wrapped in an implicit extern "C" block." 131 OS << " 3 4"; 132 } 133 OS << EOL; 134} 135 136void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 137 StringRef EOL) { 138 OS << "@import " << Mod->getFullModuleName() << ";" 139 << " /* clang -frewrite-includes: implicit import */" << EOL; 140} 141 142/// FileChanged - Whenever the preprocessor enters or exits a #include file 143/// it invokes this handler. 144void InclusionRewriter::FileChanged(SourceLocation Loc, 145 FileChangeReason Reason, 146 SrcMgr::CharacteristicKind NewFileType, 147 FileID) { 148 if (Reason != EnterFile) 149 return; 150 if (LastInsertedFileChange == FileChanges.end()) 151 // we didn't reach this file (eg: the main file) via an inclusion directive 152 return; 153 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 154 LastInsertedFileChange->second.FileType = NewFileType; 155 LastInsertedFileChange = FileChanges.end(); 156} 157 158/// Called whenever an inclusion is skipped due to canonical header protection 159/// macros. 160void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 161 const Token &/*FilenameTok*/, 162 SrcMgr::CharacteristicKind /*FileType*/) { 163 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 164 "found via an inclusion directive, was skipped"); 165 FileChanges.erase(LastInsertedFileChange); 166 LastInsertedFileChange = FileChanges.end(); 167} 168 169/// This should be called whenever the preprocessor encounters include 170/// directives. It does not say whether the file has been included, but it 171/// provides more information about the directive (hash location instead 172/// of location inside the included file). It is assumed that the matching 173/// FileChanged() or FileSkipped() is called after this. 174void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 175 const Token &/*IncludeTok*/, 176 StringRef /*FileName*/, 177 bool /*IsAngled*/, 178 CharSourceRange /*FilenameRange*/, 179 const FileEntry * /*File*/, 180 StringRef /*SearchPath*/, 181 StringRef /*RelativePath*/, 182 const Module *Imported) { 183 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 184 "directive was found before the previous one was processed"); 185 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 186 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 187 assert(p.second && "Unexpected revisitation of the same include directive"); 188 if (!Imported) 189 LastInsertedFileChange = p.first; 190} 191 192/// Simple lookup for a SourceLocation (specifically one denoting the hash in 193/// an inclusion directive) in the map of inclusion information, FileChanges. 194const InclusionRewriter::FileChange * 195InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 196 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 197 if (I != FileChanges.end()) 198 return &I->second; 199 return NULL; 200} 201 202/// Detect the likely line ending style of \p FromFile by examining the first 203/// newline found within it. 204static StringRef DetectEOL(const MemoryBuffer &FromFile) { 205 // detect what line endings the file uses, so that added content does not mix 206 // the style 207 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 208 if (Pos == NULL) 209 return "\n"; 210 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 211 return "\n\r"; 212 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 213 return "\r\n"; 214 return "\n"; 215} 216 217/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 218/// \p WriteTo - 1. 219void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 220 unsigned &WriteFrom, unsigned WriteTo, 221 StringRef EOL, int &Line, 222 bool EnsureNewline) { 223 if (WriteTo <= WriteFrom) 224 return; 225 if (&FromFile == PredefinesBuffer) { 226 // Ignore the #defines of the predefines buffer. 227 WriteFrom = WriteTo; 228 return; 229 } 230 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 231 // count lines manually, it's faster than getPresumedLoc() 232 Line += std::count(FromFile.getBufferStart() + WriteFrom, 233 FromFile.getBufferStart() + WriteTo, '\n'); 234 if (EnsureNewline) { 235 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 236 if (LastChar != '\n' && LastChar != '\r') 237 OS << EOL; 238 } 239 WriteFrom = WriteTo; 240} 241 242/// Print characters from \p FromFile starting at \p NextToWrite up until the 243/// inclusion directive at \p StartToken, then print out the inclusion 244/// inclusion directive disabled by a #if directive, updating \p NextToWrite 245/// and \p Line to track the number of source lines visited and the progress 246/// through the \p FromFile buffer. 247void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 248 const Token &StartToken, 249 const MemoryBuffer &FromFile, 250 StringRef EOL, 251 unsigned &NextToWrite, int &Line) { 252 OutputContentUpTo(FromFile, NextToWrite, 253 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 254 Token DirectiveToken; 255 do { 256 DirectiveLex.LexFromRawLexer(DirectiveToken); 257 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 258 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 259 OutputContentUpTo(FromFile, NextToWrite, 260 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 261 EOL, Line); 262 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 263} 264 265/// Find the next identifier in the pragma directive specified by \p RawToken. 266StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 267 Token &RawToken) { 268 RawLex.LexFromRawLexer(RawToken); 269 if (RawToken.is(tok::raw_identifier)) 270 PP.LookUpIdentifierInfo(RawToken); 271 if (RawToken.is(tok::identifier)) 272 return RawToken.getIdentifierInfo()->getName(); 273 return StringRef(); 274} 275 276// Expand __has_include and __has_include_next if possible. If there's no 277// definitive answer return false. 278bool InclusionRewriter::HandleHasInclude( 279 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok, 280 bool &FileExists) { 281 // Lex the opening paren. 282 RawLex.LexFromRawLexer(Tok); 283 if (Tok.isNot(tok::l_paren)) 284 return false; 285 286 RawLex.LexFromRawLexer(Tok); 287 288 SmallString<128> FilenameBuffer; 289 StringRef Filename; 290 // Since the raw lexer doesn't give us angle_literals we have to parse them 291 // ourselves. 292 // FIXME: What to do if the file name is a macro? 293 if (Tok.is(tok::less)) { 294 RawLex.LexFromRawLexer(Tok); 295 296 FilenameBuffer += '<'; 297 do { 298 if (Tok.is(tok::eod)) // Sanity check. 299 return false; 300 301 if (Tok.is(tok::raw_identifier)) 302 PP.LookUpIdentifierInfo(Tok); 303 304 // Get the string piece. 305 SmallVector<char, 128> TmpBuffer; 306 bool Invalid = false; 307 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid); 308 if (Invalid) 309 return false; 310 311 FilenameBuffer += TmpName; 312 313 RawLex.LexFromRawLexer(Tok); 314 } while (Tok.isNot(tok::greater)); 315 316 FilenameBuffer += '>'; 317 Filename = FilenameBuffer; 318 } else { 319 if (Tok.isNot(tok::string_literal)) 320 return false; 321 322 bool Invalid = false; 323 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); 324 if (Invalid) 325 return false; 326 } 327 328 // Lex the closing paren. 329 RawLex.LexFromRawLexer(Tok); 330 if (Tok.isNot(tok::r_paren)) 331 return false; 332 333 // Now ask HeaderInfo if it knows about the header. 334 // FIXME: Subframeworks aren't handled here. Do we care? 335 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); 336 const DirectoryLookup *CurDir; 337 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile( 338 Filename, isAngled, 0, CurDir, 339 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false); 340 341 FileExists = File != 0; 342 return true; 343} 344 345/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it 346/// and including content of included files recursively. 347bool InclusionRewriter::Process(FileID FileId, 348 SrcMgr::CharacteristicKind FileType) 349{ 350 bool Invalid; 351 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 352 if (Invalid) // invalid inclusion 353 return false; 354 const char *FileName = FromFile.getBufferIdentifier(); 355 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 356 RawLex.SetCommentRetentionState(false); 357 358 StringRef EOL = DetectEOL(FromFile); 359 360 // Per the GNU docs: "1" indicates the start of a new file. 361 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 362 363 if (SM.getFileIDSize(FileId) == 0) 364 return false; 365 366 // The next byte to be copied from the source file 367 unsigned NextToWrite = 0; 368 int Line = 1; // The current input file line number. 369 370 // Ignore UTF-8 BOM, otherwise it'd end up somewhere else than the start 371 // of the resulting file. 372 if (FromFile.getBuffer().startswith("\xEF\xBB\xBF")) 373 NextToWrite = 3; 374 375 Token RawToken; 376 RawLex.LexFromRawLexer(RawToken); 377 378 // TODO: Consider adding a switch that strips possibly unimportant content, 379 // such as comments, to reduce the size of repro files. 380 while (RawToken.isNot(tok::eof)) { 381 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 382 RawLex.setParsingPreprocessorDirective(true); 383 Token HashToken = RawToken; 384 RawLex.LexFromRawLexer(RawToken); 385 if (RawToken.is(tok::raw_identifier)) 386 PP.LookUpIdentifierInfo(RawToken); 387 if (RawToken.getIdentifierInfo() != NULL) { 388 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 389 case tok::pp_include: 390 case tok::pp_include_next: 391 case tok::pp_import: { 392 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 393 Line); 394 StringRef LineInfoExtra; 395 if (const FileChange *Change = FindFileChangeLocation( 396 HashToken.getLocation())) { 397 if (Change->Mod) { 398 WriteImplicitModuleImport(Change->Mod, EOL); 399 400 // else now include and recursively process the file 401 } else if (Process(Change->Id, Change->FileType)) { 402 // and set lineinfo back to this file, if the nested one was 403 // actually included 404 // `2' indicates returning to a file (after having included 405 // another file. 406 LineInfoExtra = " 2"; 407 } 408 } 409 // fix up lineinfo (since commented out directive changed line 410 // numbers) for inclusions that were skipped due to header guards 411 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 412 break; 413 } 414 case tok::pp_pragma: { 415 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 416 if (Identifier == "clang" || Identifier == "GCC") { 417 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 418 // keep the directive in, commented out 419 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 420 NextToWrite, Line); 421 // update our own type 422 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 423 WriteLineInfo(FileName, Line, FileType, EOL); 424 } 425 } else if (Identifier == "once") { 426 // keep the directive in, commented out 427 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 428 NextToWrite, Line); 429 WriteLineInfo(FileName, Line, FileType, EOL); 430 } 431 break; 432 } 433 case tok::pp_if: 434 case tok::pp_elif: { 435 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() == 436 tok::pp_elif); 437 // Rewrite special builtin macros to avoid pulling in host details. 438 do { 439 // Walk over the directive. 440 RawLex.LexFromRawLexer(RawToken); 441 if (RawToken.is(tok::raw_identifier)) 442 PP.LookUpIdentifierInfo(RawToken); 443 444 if (RawToken.is(tok::identifier)) { 445 bool HasFile; 446 SourceLocation Loc = RawToken.getLocation(); 447 448 // Rewrite __has_include(x) 449 if (RawToken.getIdentifierInfo()->isStr("__has_include")) { 450 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile)) 451 continue; 452 // Rewrite __has_include_next(x) 453 } else if (RawToken.getIdentifierInfo()->isStr( 454 "__has_include_next")) { 455 const DirectoryLookup *Lookup = PP.GetCurDirLookup(); 456 if (Lookup) 457 ++Lookup; 458 459 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken, 460 HasFile)) 461 continue; 462 } else { 463 continue; 464 } 465 // Replace the macro with (0) or (1), followed by the commented 466 // out macro for reference. 467 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), 468 EOL, Line); 469 OS << '(' << (int) HasFile << ")/*"; 470 OutputContentUpTo(FromFile, NextToWrite, 471 SM.getFileOffset(RawToken.getLocation()) + 472 RawToken.getLength(), 473 EOL, Line); 474 OS << "*/"; 475 } 476 } while (RawToken.isNot(tok::eod)); 477 if (elif) { 478 OutputContentUpTo(FromFile, NextToWrite, 479 SM.getFileOffset(RawToken.getLocation()) + 480 RawToken.getLength(), 481 EOL, Line, /*EnsureNewLine*/ true); 482 WriteLineInfo(FileName, Line, FileType, EOL); 483 } 484 break; 485 } 486 case tok::pp_endif: 487 case tok::pp_else: { 488 // We surround every #include by #if 0 to comment it out, but that 489 // changes line numbers. These are fixed up right after that, but 490 // the whole #include could be inside a preprocessor conditional 491 // that is not processed. So it is necessary to fix the line 492 // numbers one the next line after each #else/#endif as well. 493 RawLex.SetKeepWhitespaceMode(true); 494 do { 495 RawLex.LexFromRawLexer(RawToken); 496 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); 497 OutputContentUpTo( 498 FromFile, NextToWrite, 499 SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), 500 EOL, Line, /*EnsureNewLine*/ true); 501 WriteLineInfo(FileName, Line, FileType, EOL); 502 RawLex.SetKeepWhitespaceMode(false); 503 } 504 default: 505 break; 506 } 507 } 508 RawLex.setParsingPreprocessorDirective(false); 509 } 510 RawLex.LexFromRawLexer(RawToken); 511 } 512 OutputContentUpTo(FromFile, NextToWrite, 513 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line, 514 /*EnsureNewline*/true); 515 return true; 516} 517 518/// InclusionRewriterInInput - Implement -frewrite-includes mode. 519void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 520 const PreprocessorOutputOptions &Opts) { 521 SourceManager &SM = PP.getSourceManager(); 522 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 523 Opts.ShowLineMarkers); 524 PP.addPPCallbacks(Rewrite); 525 // Ignore all pragmas, otherwise there will be warnings about unknown pragmas 526 // (because there's nothing to handle them). 527 PP.AddPragmaHandler(new EmptyPragmaHandler()); 528 // Ignore also all pragma in all namespaces created 529 // in Preprocessor::RegisterBuiltinPragmas(). 530 PP.AddPragmaHandler("GCC", new EmptyPragmaHandler()); 531 PP.AddPragmaHandler("clang", new EmptyPragmaHandler()); 532 533 // First let the preprocessor process the entire file and call callbacks. 534 // Callbacks will record which #include's were actually performed. 535 PP.EnterMainSourceFile(); 536 Token Tok; 537 // Only preprocessor directives matter here, so disable macro expansion 538 // everywhere else as an optimization. 539 // TODO: It would be even faster if the preprocessor could be switched 540 // to a mode where it would parse only preprocessor directives and comments, 541 // nothing else matters for parsing or processing. 542 PP.SetMacroExpansionOnlyInDirectives(); 543 do { 544 PP.Lex(Tok); 545 } while (Tok.isNot(tok::eof)); 546 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID())); 547 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User); 548 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 549 OS->flush(); 550} 551