InclusionRewriter.cpp revision 8ee6a0dcc985c65bf5fd61a63e3f86e3ac516f5e
1//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This code rewrites include invocations into their expansions. This gives you 11// a file with all included files merged into it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Frontend/Rewriters.h" 16#include "clang/Basic/SourceManager.h" 17#include "clang/Frontend/PreprocessorOutputOptions.h" 18#include "clang/Lex/HeaderSearch.h" 19#include "clang/Lex/Pragma.h" 20#include "clang/Lex/Preprocessor.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/Support/raw_ostream.h" 23 24using namespace clang; 25using namespace llvm; 26 27namespace { 28 29class InclusionRewriter : public PPCallbacks { 30 /// Information about which #includes were actually performed, 31 /// created by preprocessor callbacks. 32 struct FileChange { 33 const Module *Mod; 34 SourceLocation From; 35 FileID Id; 36 SrcMgr::CharacteristicKind FileType; 37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) { 38 } 39 }; 40 Preprocessor &PP; ///< Used to find inclusion directives. 41 SourceManager &SM; ///< Used to read and manage source files. 42 raw_ostream &OS; ///< The destination stream for rewritten contents. 43 bool ShowLineMarkers; ///< Show #line markers. 44 bool UseLineDirective; ///< Use of line directives or line markers. 45 typedef std::map<unsigned, FileChange> FileChangeMap; 46 FileChangeMap FileChanges; ///< Tracks which files were included where. 47 /// Used transitively for building up the FileChanges mapping over the 48 /// various \c PPCallbacks callbacks. 49 FileChangeMap::iterator LastInsertedFileChange; 50public: 51 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); 52 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 53private: 54 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 55 SrcMgr::CharacteristicKind FileType, 56 FileID PrevFID); 57 virtual void FileSkipped(const FileEntry &ParentFile, 58 const Token &FilenameTok, 59 SrcMgr::CharacteristicKind FileType); 60 virtual void InclusionDirective(SourceLocation HashLoc, 61 const Token &IncludeTok, 62 StringRef FileName, 63 bool IsAngled, 64 CharSourceRange FilenameRange, 65 const FileEntry *File, 66 StringRef SearchPath, 67 StringRef RelativePath, 68 const Module *Imported); 69 void WriteLineInfo(const char *Filename, int Line, 70 SrcMgr::CharacteristicKind FileType, 71 StringRef EOL, StringRef Extra = StringRef()); 72 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL); 73 void OutputContentUpTo(const MemoryBuffer &FromFile, 74 unsigned &WriteFrom, unsigned WriteTo, 75 StringRef EOL, int &lines, 76 bool EnsureNewline = false); 77 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 78 const MemoryBuffer &FromFile, StringRef EOL, 79 unsigned &NextToWrite, int &Lines); 80 bool HandleHasInclude(FileID FileId, Lexer &RawLex, 81 const DirectoryLookup *Lookup, Token &Tok, 82 bool &FileExists); 83 const FileChange *FindFileChangeLocation(SourceLocation Loc) const; 84 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 85}; 86 87} // end anonymous namespace 88 89/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 90InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 91 bool ShowLineMarkers) 92 : PP(PP), SM(PP.getSourceManager()), OS(OS), 93 ShowLineMarkers(ShowLineMarkers), 94 LastInsertedFileChange(FileChanges.end()) { 95 // If we're in microsoft mode, use normal #line instead of line markers. 96 UseLineDirective = PP.getLangOpts().MicrosoftExt; 97} 98 99/// Write appropriate line information as either #line directives or GNU line 100/// markers depending on what mode we're in, including the \p Filename and 101/// \p Line we are located at, using the specified \p EOL line separator, and 102/// any \p Extra context specifiers in GNU line directives. 103void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, 104 SrcMgr::CharacteristicKind FileType, 105 StringRef EOL, StringRef Extra) { 106 if (!ShowLineMarkers) 107 return; 108 if (UseLineDirective) { 109 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; 110 } else { 111 // Use GNU linemarkers as described here: 112 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 113 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; 114 if (!Extra.empty()) 115 OS << Extra; 116 if (FileType == SrcMgr::C_System) 117 // "`3' This indicates that the following text comes from a system header 118 // file, so certain warnings should be suppressed." 119 OS << " 3"; 120 else if (FileType == SrcMgr::C_ExternCSystem) 121 // as above for `3', plus "`4' This indicates that the following text 122 // should be treated as being wrapped in an implicit extern "C" block." 123 OS << " 3 4"; 124 } 125 OS << EOL; 126} 127 128void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod, 129 StringRef EOL) { 130 OS << "@import " << Mod->getFullModuleName() << ";" 131 << " /* clang -frewrite-includes: implicit import */" << EOL; 132} 133 134/// FileChanged - Whenever the preprocessor enters or exits a #include file 135/// it invokes this handler. 136void InclusionRewriter::FileChanged(SourceLocation Loc, 137 FileChangeReason Reason, 138 SrcMgr::CharacteristicKind NewFileType, 139 FileID) { 140 if (Reason != EnterFile) 141 return; 142 if (LastInsertedFileChange == FileChanges.end()) 143 // we didn't reach this file (eg: the main file) via an inclusion directive 144 return; 145 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); 146 LastInsertedFileChange->second.FileType = NewFileType; 147 LastInsertedFileChange = FileChanges.end(); 148} 149 150/// Called whenever an inclusion is skipped due to canonical header protection 151/// macros. 152void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, 153 const Token &/*FilenameTok*/, 154 SrcMgr::CharacteristicKind /*FileType*/) { 155 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " 156 "found via an inclusion directive, was skipped"); 157 FileChanges.erase(LastInsertedFileChange); 158 LastInsertedFileChange = FileChanges.end(); 159} 160 161/// This should be called whenever the preprocessor encounters include 162/// directives. It does not say whether the file has been included, but it 163/// provides more information about the directive (hash location instead 164/// of location inside the included file). It is assumed that the matching 165/// FileChanged() or FileSkipped() is called after this. 166void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, 167 const Token &/*IncludeTok*/, 168 StringRef /*FileName*/, 169 bool /*IsAngled*/, 170 CharSourceRange /*FilenameRange*/, 171 const FileEntry * /*File*/, 172 StringRef /*SearchPath*/, 173 StringRef /*RelativePath*/, 174 const Module *Imported) { 175 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " 176 "directive was found before the previous one was processed"); 177 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( 178 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported))); 179 assert(p.second && "Unexpected revisitation of the same include directive"); 180 if (!Imported) 181 LastInsertedFileChange = p.first; 182} 183 184/// Simple lookup for a SourceLocation (specifically one denoting the hash in 185/// an inclusion directive) in the map of inclusion information, FileChanges. 186const InclusionRewriter::FileChange * 187InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { 188 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); 189 if (I != FileChanges.end()) 190 return &I->second; 191 return NULL; 192} 193 194/// Detect the likely line ending style of \p FromFile by examining the first 195/// newline found within it. 196static StringRef DetectEOL(const MemoryBuffer &FromFile) { 197 // detect what line endings the file uses, so that added content does not mix 198 // the style 199 const char *Pos = strchr(FromFile.getBufferStart(), '\n'); 200 if (Pos == NULL) 201 return "\n"; 202 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') 203 return "\n\r"; 204 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') 205 return "\r\n"; 206 return "\n"; 207} 208 209/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 210/// \p WriteTo - 1. 211void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, 212 unsigned &WriteFrom, unsigned WriteTo, 213 StringRef EOL, int &Line, 214 bool EnsureNewline) { 215 if (WriteTo <= WriteFrom) 216 return; 217 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); 218 // count lines manually, it's faster than getPresumedLoc() 219 Line += std::count(FromFile.getBufferStart() + WriteFrom, 220 FromFile.getBufferStart() + WriteTo, '\n'); 221 if (EnsureNewline) { 222 char LastChar = FromFile.getBufferStart()[WriteTo - 1]; 223 if (LastChar != '\n' && LastChar != '\r') 224 OS << EOL; 225 } 226 WriteFrom = WriteTo; 227} 228 229/// Print characters from \p FromFile starting at \p NextToWrite up until the 230/// inclusion directive at \p StartToken, then print out the inclusion 231/// inclusion directive disabled by a #if directive, updating \p NextToWrite 232/// and \p Line to track the number of source lines visited and the progress 233/// through the \p FromFile buffer. 234void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 235 const Token &StartToken, 236 const MemoryBuffer &FromFile, 237 StringRef EOL, 238 unsigned &NextToWrite, int &Line) { 239 OutputContentUpTo(FromFile, NextToWrite, 240 SM.getFileOffset(StartToken.getLocation()), EOL, Line); 241 Token DirectiveToken; 242 do { 243 DirectiveLex.LexFromRawLexer(DirectiveToken); 244 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 245 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; 246 OutputContentUpTo(FromFile, NextToWrite, 247 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), 248 EOL, Line); 249 OS << "#endif /* expanded by -frewrite-includes */" << EOL; 250} 251 252/// Find the next identifier in the pragma directive specified by \p RawToken. 253StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 254 Token &RawToken) { 255 RawLex.LexFromRawLexer(RawToken); 256 if (RawToken.is(tok::raw_identifier)) 257 PP.LookUpIdentifierInfo(RawToken); 258 if (RawToken.is(tok::identifier)) 259 return RawToken.getIdentifierInfo()->getName(); 260 return StringRef(); 261} 262 263// Expand __has_include and __has_include_next if possible. If there's no 264// definitive answer return false. 265bool InclusionRewriter::HandleHasInclude( 266 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok, 267 bool &FileExists) { 268 // Lex the opening paren. 269 RawLex.LexFromRawLexer(Tok); 270 if (Tok.isNot(tok::l_paren)) 271 return false; 272 273 RawLex.LexFromRawLexer(Tok); 274 275 SmallString<128> FilenameBuffer; 276 StringRef Filename; 277 // Since the raw lexer doesn't give us angle_literals we have to parse them 278 // ourselves. 279 // FIXME: What to do if the file name is a macro? 280 if (Tok.is(tok::less)) { 281 RawLex.LexFromRawLexer(Tok); 282 283 FilenameBuffer += '<'; 284 do { 285 if (Tok.is(tok::eod)) // Sanity check. 286 return false; 287 288 if (Tok.is(tok::raw_identifier)) 289 PP.LookUpIdentifierInfo(Tok); 290 291 // Get the string piece. 292 SmallVector<char, 128> TmpBuffer; 293 bool Invalid = false; 294 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid); 295 if (Invalid) 296 return false; 297 298 FilenameBuffer += TmpName; 299 300 RawLex.LexFromRawLexer(Tok); 301 } while (Tok.isNot(tok::greater)); 302 303 FilenameBuffer += '>'; 304 Filename = FilenameBuffer; 305 } else { 306 if (Tok.isNot(tok::string_literal)) 307 return false; 308 309 bool Invalid = false; 310 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); 311 if (Invalid) 312 return false; 313 } 314 315 // Lex the closing paren. 316 RawLex.LexFromRawLexer(Tok); 317 if (Tok.isNot(tok::r_paren)) 318 return false; 319 320 // Now ask HeaderInfo if it knows about the header. 321 // FIXME: Subframeworks aren't handled here. Do we care? 322 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); 323 const DirectoryLookup *CurDir; 324 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile( 325 Filename, isAngled, 0, CurDir, 326 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false); 327 328 FileExists = File != 0; 329 return true; 330} 331 332/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it 333/// and including content of included files recursively. 334bool InclusionRewriter::Process(FileID FileId, 335 SrcMgr::CharacteristicKind FileType) 336{ 337 bool Invalid; 338 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); 339 if (Invalid) // invalid inclusion 340 return false; 341 const char *FileName = FromFile.getBufferIdentifier(); 342 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); 343 RawLex.SetCommentRetentionState(false); 344 345 StringRef EOL = DetectEOL(FromFile); 346 347 // Per the GNU docs: "1" indicates the start of a new file. 348 WriteLineInfo(FileName, 1, FileType, EOL, " 1"); 349 350 if (SM.getFileIDSize(FileId) == 0) 351 return false; 352 353 // The next byte to be copied from the source file 354 unsigned NextToWrite = 0; 355 int Line = 1; // The current input file line number. 356 357 Token RawToken; 358 RawLex.LexFromRawLexer(RawToken); 359 360 // TODO: Consider adding a switch that strips possibly unimportant content, 361 // such as comments, to reduce the size of repro files. 362 while (RawToken.isNot(tok::eof)) { 363 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 364 RawLex.setParsingPreprocessorDirective(true); 365 Token HashToken = RawToken; 366 RawLex.LexFromRawLexer(RawToken); 367 if (RawToken.is(tok::raw_identifier)) 368 PP.LookUpIdentifierInfo(RawToken); 369 if (RawToken.getIdentifierInfo() != NULL) { 370 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 371 case tok::pp_include: 372 case tok::pp_include_next: 373 case tok::pp_import: { 374 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, 375 Line); 376 StringRef LineInfoExtra; 377 if (const FileChange *Change = FindFileChangeLocation( 378 HashToken.getLocation())) { 379 if (Change->Mod) { 380 WriteImplicitModuleImport(Change->Mod, EOL); 381 382 // else now include and recursively process the file 383 } else if (Process(Change->Id, Change->FileType)) { 384 // and set lineinfo back to this file, if the nested one was 385 // actually included 386 // `2' indicates returning to a file (after having included 387 // another file. 388 LineInfoExtra = " 2"; 389 } 390 } 391 // fix up lineinfo (since commented out directive changed line 392 // numbers) for inclusions that were skipped due to header guards 393 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra); 394 break; 395 } 396 case tok::pp_pragma: { 397 StringRef Identifier = NextIdentifierName(RawLex, RawToken); 398 if (Identifier == "clang" || Identifier == "GCC") { 399 if (NextIdentifierName(RawLex, RawToken) == "system_header") { 400 // keep the directive in, commented out 401 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 402 NextToWrite, Line); 403 // update our own type 404 FileType = SM.getFileCharacteristic(RawToken.getLocation()); 405 WriteLineInfo(FileName, Line, FileType, EOL); 406 } 407 } else if (Identifier == "once") { 408 // keep the directive in, commented out 409 CommentOutDirective(RawLex, HashToken, FromFile, EOL, 410 NextToWrite, Line); 411 WriteLineInfo(FileName, Line, FileType, EOL); 412 } 413 break; 414 } 415 case tok::pp_if: 416 case tok::pp_elif: { 417 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() == 418 tok::pp_elif); 419 // Rewrite special builtin macros to avoid pulling in host details. 420 do { 421 // Walk over the directive. 422 RawLex.LexFromRawLexer(RawToken); 423 if (RawToken.is(tok::raw_identifier)) 424 PP.LookUpIdentifierInfo(RawToken); 425 426 if (RawToken.is(tok::identifier)) { 427 bool HasFile; 428 SourceLocation Loc = RawToken.getLocation(); 429 430 // Rewrite __has_include(x) 431 if (RawToken.getIdentifierInfo()->isStr("__has_include")) { 432 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile)) 433 continue; 434 // Rewrite __has_include_next(x) 435 } else if (RawToken.getIdentifierInfo()->isStr( 436 "__has_include_next")) { 437 const DirectoryLookup *Lookup = PP.GetCurDirLookup(); 438 if (Lookup) 439 ++Lookup; 440 441 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken, 442 HasFile)) 443 continue; 444 } else { 445 continue; 446 } 447 // Replace the macro with (0) or (1), followed by the commented 448 // out macro for reference. 449 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc), 450 EOL, Line); 451 OS << '(' << (int) HasFile << ")/*"; 452 OutputContentUpTo(FromFile, NextToWrite, 453 SM.getFileOffset(RawToken.getLocation()) + 454 RawToken.getLength(), 455 EOL, Line); 456 OS << "*/"; 457 } 458 } while (RawToken.isNot(tok::eod)); 459 if (elif) { 460 OutputContentUpTo(FromFile, NextToWrite, 461 SM.getFileOffset(RawToken.getLocation()) + 462 RawToken.getLength(), 463 EOL, Line, /*EnsureNewLine*/ true); 464 WriteLineInfo(FileName, Line, FileType, EOL); 465 } 466 break; 467 } 468 case tok::pp_endif: 469 case tok::pp_else: { 470 // We surround every #include by #if 0 to comment it out, but that 471 // changes line numbers. These are fixed up right after that, but 472 // the whole #include could be inside a preprocessor conditional 473 // that is not processed. So it is necessary to fix the line 474 // numbers one the next line after each #else/#endif as well. 475 RawLex.SetKeepWhitespaceMode(true); 476 do { 477 RawLex.LexFromRawLexer(RawToken); 478 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); 479 OutputContentUpTo( 480 FromFile, NextToWrite, 481 SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), 482 EOL, Line, /*EnsureNewLine*/ true); 483 WriteLineInfo(FileName, Line, FileType, EOL); 484 RawLex.SetKeepWhitespaceMode(false); 485 } 486 default: 487 break; 488 } 489 } 490 RawLex.setParsingPreprocessorDirective(false); 491 } 492 RawLex.LexFromRawLexer(RawToken); 493 } 494 OutputContentUpTo(FromFile, NextToWrite, 495 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line, 496 /*EnsureNewline*/true); 497 return true; 498} 499 500/// InclusionRewriterInInput - Implement -frewrite-includes mode. 501void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 502 const PreprocessorOutputOptions &Opts) { 503 SourceManager &SM = PP.getSourceManager(); 504 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, 505 Opts.ShowLineMarkers); 506 PP.addPPCallbacks(Rewrite); 507 // Ignore all pragmas, otherwise there will be warnings about unknown pragmas 508 // (because there's nothing to handle them). 509 PP.AddPragmaHandler(new EmptyPragmaHandler()); 510 // Ignore also all pragma in all namespaces created 511 // in Preprocessor::RegisterBuiltinPragmas(). 512 PP.AddPragmaHandler("GCC", new EmptyPragmaHandler()); 513 PP.AddPragmaHandler("clang", new EmptyPragmaHandler()); 514 515 // First let the preprocessor process the entire file and call callbacks. 516 // Callbacks will record which #include's were actually performed. 517 PP.EnterMainSourceFile(); 518 Token Tok; 519 // Only preprocessor directives matter here, so disable macro expansion 520 // everywhere else as an optimization. 521 // TODO: It would be even faster if the preprocessor could be switched 522 // to a mode where it would parse only preprocessor directives and comments, 523 // nothing else matters for parsing or processing. 524 PP.SetMacroExpansionOnlyInDirectives(); 525 do { 526 PP.Lex(Tok); 527 } while (Tok.isNot(tok::eof)); 528 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 529 OS->flush(); 530} 531