Preprocessor.cpp revision 63ceaa32a2371e38d1f912080fe471285e6b6e56
1//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the Preprocessor interface. 11// 12//===----------------------------------------------------------------------===// 13// 14// Options to support: 15// -H - Print the name of each header file used. 16// -d[DNI] - Dump various things. 17// -fworking-directory - #line's with preprocessor's working dir. 18// -fpreprocessed 19// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20// -W* 21// -w 22// 23// Messages to emit: 24// "Multiple include guards may be useful for:\n" 25// 26//===----------------------------------------------------------------------===// 27 28#include "clang/Lex/Preprocessor.h" 29#include "clang/Lex/HeaderSearch.h" 30#include "clang/Lex/MacroInfo.h" 31#include "clang/Lex/Pragma.h" 32#include "clang/Lex/ScratchBuffer.h" 33#include "clang/Lex/LexDiagnostic.h" 34#include "clang/Basic/SourceManager.h" 35#include "clang/Basic/FileManager.h" 36#include "clang/Basic/TargetInfo.h" 37#include "llvm/ADT/APFloat.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/Support/MemoryBuffer.h" 40#include "llvm/Support/raw_ostream.h" 41#include <cstdio> 42using namespace clang; 43 44//===----------------------------------------------------------------------===// 45 46Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, 47 const TargetInfo &target, SourceManager &SM, 48 HeaderSearch &Headers, 49 IdentifierInfoLookup* IILookup, 50 bool OwnsHeaders) 51 : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), 52 SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts, IILookup), 53 BuiltinInfo(Target), CodeCompletionFile(0), CurPPLexer(0), CurDirLookup(0), 54 Callbacks(0) { 55 ScratchBuf = new ScratchBuffer(SourceMgr); 56 CounterValue = 0; // __COUNTER__ starts at 0. 57 OwnsHeaderSearch = OwnsHeaders; 58 59 // Clear stats. 60 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 61 NumIf = NumElse = NumEndif = 0; 62 NumEnteredSourceFiles = 0; 63 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 64 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 65 MaxIncludeStackDepth = 0; 66 NumSkipped = 0; 67 68 // Default to discarding comments. 69 KeepComments = false; 70 KeepMacroComments = false; 71 72 // Macro expansion is enabled. 73 DisableMacroExpansion = false; 74 InMacroArgs = false; 75 NumCachedTokenLexers = 0; 76 77 CachedLexPos = 0; 78 79 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 80 // This gets unpoisoned where it is allowed. 81 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 82 83 // Initialize the pragma handlers. 84 PragmaHandlers = new PragmaNamespace(0); 85 RegisterBuiltinPragmas(); 86 87 // Initialize builtin macros like __LINE__ and friends. 88 RegisterBuiltinMacros(); 89} 90 91Preprocessor::~Preprocessor() { 92 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 93 94 while (!IncludeMacroStack.empty()) { 95 delete IncludeMacroStack.back().TheLexer; 96 delete IncludeMacroStack.back().TheTokenLexer; 97 IncludeMacroStack.pop_back(); 98 } 99 100 // Free any macro definitions. 101 for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I = 102 Macros.begin(), E = Macros.end(); I != E; ++I) { 103 // We don't need to free the MacroInfo objects directly. These 104 // will be released when the BumpPtrAllocator 'BP' object gets 105 // destroyed. We still need to run the dstor, however, to free 106 // memory alocated by MacroInfo. 107 I->second->Destroy(BP); 108 I->first->setHasMacroDefinition(false); 109 } 110 111 // Free any cached macro expanders. 112 for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) 113 delete TokenLexerCache[i]; 114 115 // Release pragma information. 116 delete PragmaHandlers; 117 118 // Delete the scratch buffer info. 119 delete ScratchBuf; 120 121 // Delete the header search info, if we own it. 122 if (OwnsHeaderSearch) 123 delete &HeaderInfo; 124 125 delete Callbacks; 126} 127 128void Preprocessor::setPTHManager(PTHManager* pm) { 129 PTH.reset(pm); 130 FileMgr.addStatCache(PTH->createStatCache()); 131} 132 133void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 134 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 135 << getSpelling(Tok) << "'"; 136 137 if (!DumpFlags) return; 138 139 llvm::errs() << "\t"; 140 if (Tok.isAtStartOfLine()) 141 llvm::errs() << " [StartOfLine]"; 142 if (Tok.hasLeadingSpace()) 143 llvm::errs() << " [LeadingSpace]"; 144 if (Tok.isExpandDisabled()) 145 llvm::errs() << " [ExpandDisabled]"; 146 if (Tok.needsCleaning()) { 147 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 148 llvm::errs() << " [UnClean='" << std::string(Start, Start+Tok.getLength()) 149 << "']"; 150 } 151 152 llvm::errs() << "\tLoc=<"; 153 DumpLocation(Tok.getLocation()); 154 llvm::errs() << ">"; 155} 156 157void Preprocessor::DumpLocation(SourceLocation Loc) const { 158 Loc.dump(SourceMgr); 159} 160 161void Preprocessor::DumpMacro(const MacroInfo &MI) const { 162 llvm::errs() << "MACRO: "; 163 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 164 DumpToken(MI.getReplacementToken(i)); 165 llvm::errs() << " "; 166 } 167 llvm::errs() << "\n"; 168} 169 170void Preprocessor::PrintStats() { 171 llvm::errs() << "\n*** Preprocessor Stats:\n"; 172 llvm::errs() << NumDirectives << " directives found:\n"; 173 llvm::errs() << " " << NumDefined << " #define.\n"; 174 llvm::errs() << " " << NumUndefined << " #undef.\n"; 175 llvm::errs() << " #include/#include_next/#import:\n"; 176 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 177 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 178 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 179 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 180 llvm::errs() << " " << NumEndif << " #endif.\n"; 181 llvm::errs() << " " << NumPragma << " #pragma.\n"; 182 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 183 184 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 185 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 186 << NumFastMacroExpanded << " on the fast path.\n"; 187 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 188 << " token paste (##) operations performed, " 189 << NumFastTokenPaste << " on the fast path.\n"; 190} 191 192bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 193 unsigned TruncateAtLine, 194 unsigned TruncateAtColumn) { 195 using llvm::MemoryBuffer; 196 197 CodeCompletionFile = File; 198 199 // Okay to clear out the code-completion point by passing NULL. 200 if (!CodeCompletionFile) 201 return false; 202 203 // Load the actual file's contents. 204 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File); 205 if (!Buffer) 206 return true; 207 208 // Find the byte position of the truncation point. 209 const char *Position = Buffer->getBufferStart(); 210 for (unsigned Line = 1; Line < TruncateAtLine; ++Line) { 211 for (; *Position; ++Position) { 212 if (*Position != '\r' && *Position != '\n') 213 continue; 214 215 // Eat \r\n or \n\r as a single line. 216 if ((Position[1] == '\r' || Position[1] == '\n') && 217 Position[0] != Position[1]) 218 ++Position; 219 ++Position; 220 break; 221 } 222 } 223 224 for (unsigned Column = 1; Column < TruncateAtColumn; ++Column, ++Position) { 225 if (!*Position) 226 break; 227 228 if (*Position == '\t') 229 Column += 7; 230 } 231 232 // Truncate the buffer. 233 if (Position != Buffer->getBufferEnd()) { 234 MemoryBuffer *TruncatedBuffer 235 = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position, 236 Buffer->getBufferIdentifier()); 237 SourceMgr.overrideFileContents(File, TruncatedBuffer); 238 } 239 240 return false; 241} 242 243bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const { 244 return CodeCompletionFile && FileLoc.isFileID() && 245 SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc)) 246 == CodeCompletionFile; 247} 248 249//===----------------------------------------------------------------------===// 250// Token Spelling 251//===----------------------------------------------------------------------===// 252 253/// getSpelling() - Return the 'spelling' of this token. The spelling of a 254/// token are the characters used to represent the token in the source file 255/// after trigraph expansion and escaped-newline folding. In particular, this 256/// wants to get the true, uncanonicalized, spelling of things like digraphs 257/// UCNs, etc. 258std::string Preprocessor::getSpelling(const Token &Tok, 259 const SourceManager &SourceMgr, 260 const LangOptions &Features) { 261 assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 262 263 // If this token contains nothing interesting, return it directly. 264 const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation()); 265 if (!Tok.needsCleaning()) 266 return std::string(TokStart, TokStart+Tok.getLength()); 267 268 std::string Result; 269 Result.reserve(Tok.getLength()); 270 271 // Otherwise, hard case, relex the characters into the string. 272 for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); 273 Ptr != End; ) { 274 unsigned CharSize; 275 Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); 276 Ptr += CharSize; 277 } 278 assert(Result.size() != unsigned(Tok.getLength()) && 279 "NeedsCleaning flag set on something that didn't need cleaning!"); 280 return Result; 281} 282 283/// getSpelling() - Return the 'spelling' of this token. The spelling of a 284/// token are the characters used to represent the token in the source file 285/// after trigraph expansion and escaped-newline folding. In particular, this 286/// wants to get the true, uncanonicalized, spelling of things like digraphs 287/// UCNs, etc. 288std::string Preprocessor::getSpelling(const Token &Tok) const { 289 return getSpelling(Tok, SourceMgr, Features); 290} 291 292/// getSpelling - This method is used to get the spelling of a token into a 293/// preallocated buffer, instead of as an std::string. The caller is required 294/// to allocate enough space for the token, which is guaranteed to be at least 295/// Tok.getLength() bytes long. The actual length of the token is returned. 296/// 297/// Note that this method may do two possible things: it may either fill in 298/// the buffer specified with characters, or it may *change the input pointer* 299/// to point to a constant buffer with the data already in it (avoiding a 300/// copy). The caller is not allowed to modify the returned buffer pointer 301/// if an internal buffer is returned. 302unsigned Preprocessor::getSpelling(const Token &Tok, 303 const char *&Buffer) const { 304 assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 305 306 // If this token is an identifier, just return the string from the identifier 307 // table, which is very quick. 308 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { 309 Buffer = II->getNameStart(); 310 return II->getLength(); 311 } 312 313 // Otherwise, compute the start of the token in the input lexer buffer. 314 const char *TokStart = 0; 315 316 if (Tok.isLiteral()) 317 TokStart = Tok.getLiteralData(); 318 319 if (TokStart == 0) 320 TokStart = SourceMgr.getCharacterData(Tok.getLocation()); 321 322 // If this token contains nothing interesting, return it directly. 323 if (!Tok.needsCleaning()) { 324 Buffer = TokStart; 325 return Tok.getLength(); 326 } 327 328 // Otherwise, hard case, relex the characters into the string. 329 char *OutBuf = const_cast<char*>(Buffer); 330 for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); 331 Ptr != End; ) { 332 unsigned CharSize; 333 *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); 334 Ptr += CharSize; 335 } 336 assert(unsigned(OutBuf-Buffer) != Tok.getLength() && 337 "NeedsCleaning flag set on something that didn't need cleaning!"); 338 339 return OutBuf-Buffer; 340} 341 342/// CreateString - Plop the specified string into a scratch buffer and return a 343/// location for it. If specified, the source location provides a source 344/// location for the token. 345void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, 346 SourceLocation InstantiationLoc) { 347 Tok.setLength(Len); 348 349 const char *DestPtr; 350 SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr); 351 352 if (InstantiationLoc.isValid()) 353 Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, 354 InstantiationLoc, Len); 355 Tok.setLocation(Loc); 356 357 // If this is a literal token, set the pointer data. 358 if (Tok.isLiteral()) 359 Tok.setLiteralData(DestPtr); 360} 361 362 363/// AdvanceToTokenCharacter - Given a location that specifies the start of a 364/// token, return a new location that specifies a character within the token. 365SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, 366 unsigned CharNo) { 367 // Figure out how many physical characters away the specified instantiation 368 // character is. This needs to take into consideration newlines and 369 // trigraphs. 370 const char *TokPtr = SourceMgr.getCharacterData(TokStart); 371 372 // If they request the first char of the token, we're trivially done. 373 if (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)) 374 return TokStart; 375 376 unsigned PhysOffset = 0; 377 378 // The usual case is that tokens don't contain anything interesting. Skip 379 // over the uninteresting characters. If a token only consists of simple 380 // chars, this method is extremely fast. 381 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { 382 if (CharNo == 0) 383 return TokStart.getFileLocWithOffset(PhysOffset); 384 ++TokPtr, --CharNo, ++PhysOffset; 385 } 386 387 // If we have a character that may be a trigraph or escaped newline, use a 388 // lexer to parse it correctly. 389 for (; CharNo; --CharNo) { 390 unsigned Size; 391 Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); 392 TokPtr += Size; 393 PhysOffset += Size; 394 } 395 396 // Final detail: if we end up on an escaped newline, we want to return the 397 // location of the actual byte of the token. For example foo\<newline>bar 398 // advanced by 3 should return the location of b, not of \\. One compounding 399 // detail of this is that the escape may be made by a trigraph. 400 if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) 401 PhysOffset = Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; 402 403 return TokStart.getFileLocWithOffset(PhysOffset); 404} 405 406/// \brief Computes the source location just past the end of the 407/// token at this source location. 408/// 409/// This routine can be used to produce a source location that 410/// points just past the end of the token referenced by \p Loc, and 411/// is generally used when a diagnostic needs to point just after a 412/// token where it expected something different that it received. If 413/// the returned source location would not be meaningful (e.g., if 414/// it points into a macro), this routine returns an invalid 415/// source location. 416SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc) { 417 if (Loc.isInvalid() || !Loc.isFileID()) 418 return SourceLocation(); 419 420 unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features); 421 return AdvanceToTokenCharacter(Loc, Len); 422} 423 424 425 426//===----------------------------------------------------------------------===// 427// Preprocessor Initialization Methods 428//===----------------------------------------------------------------------===// 429 430 431/// EnterMainSourceFile - Enter the specified FileID as the main source file, 432/// which implicitly adds the builtin defines etc. 433void Preprocessor::EnterMainSourceFile() { 434 // We do not allow the preprocessor to reenter the main file. Doing so will 435 // cause FileID's to accumulate information from both runs (e.g. #line 436 // information) and predefined macros aren't guaranteed to be set properly. 437 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 438 FileID MainFileID = SourceMgr.getMainFileID(); 439 440 // Enter the main file source buffer. 441 std::string ErrorStr; 442 bool Res = EnterSourceFile(MainFileID, 0, ErrorStr); 443 assert(!Res && "Entering main file should not fail!"); 444 445 // Tell the header info that the main file was entered. If the file is later 446 // #imported, it won't be re-entered. 447 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 448 HeaderInfo.IncrementIncludeCount(FE); 449 450 std::vector<char> PrologFile; 451 PrologFile.reserve(4080); 452 453 // FIXME: Don't make a copy. 454 PrologFile.insert(PrologFile.end(), Predefines.begin(), Predefines.end()); 455 456 // Memory buffer must end with a null byte! 457 PrologFile.push_back(0); 458 459 // Now that we have emitted the predefined macros, #includes, etc into 460 // PrologFile, preprocess it to populate the initial preprocessor state. 461 llvm::MemoryBuffer *SB = 462 llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(), 463 "<built-in>"); 464 assert(SB && "Cannot fail to create predefined source buffer"); 465 FileID FID = SourceMgr.createFileIDForMemBuffer(SB); 466 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 467 468 // Start parsing the predefines. 469 Res = EnterSourceFile(FID, 0, ErrorStr); 470 assert(!Res && "Entering predefines should not fail!"); 471} 472 473 474//===----------------------------------------------------------------------===// 475// Lexer Event Handling. 476//===----------------------------------------------------------------------===// 477 478/// LookUpIdentifierInfo - Given a tok::identifier token, look up the 479/// identifier information for the token and install it into the token. 480IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, 481 const char *BufPtr) const { 482 assert(Identifier.is(tok::identifier) && "Not an identifier!"); 483 assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); 484 485 // Look up this token, see if it is a macro, or if it is a language keyword. 486 IdentifierInfo *II; 487 if (BufPtr && !Identifier.needsCleaning()) { 488 // No cleaning needed, just use the characters from the lexed buffer. 489 II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength())); 490 } else { 491 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 492 llvm::SmallVector<char, 64> IdentifierBuffer; 493 IdentifierBuffer.resize(Identifier.getLength()); 494 const char *TmpBuf = &IdentifierBuffer[0]; 495 unsigned Size = getSpelling(Identifier, TmpBuf); 496 II = getIdentifierInfo(llvm::StringRef(TmpBuf, Size)); 497 } 498 Identifier.setIdentifierInfo(II); 499 return II; 500} 501 502 503/// HandleIdentifier - This callback is invoked when the lexer reads an 504/// identifier. This callback looks up the identifier in the map and/or 505/// potentially macro expands it or turns it into a named token (like 'for'). 506/// 507/// Note that callers of this method are guarded by checking the 508/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 509/// IdentifierInfo methods that compute these properties will need to change to 510/// match. 511void Preprocessor::HandleIdentifier(Token &Identifier) { 512 assert(Identifier.getIdentifierInfo() && 513 "Can't handle identifiers without identifier info!"); 514 515 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 516 517 // If this identifier was poisoned, and if it was not produced from a macro 518 // expansion, emit an error. 519 if (II.isPoisoned() && CurPPLexer) { 520 if (&II != Ident__VA_ARGS__) // We warn about __VA_ARGS__ with poisoning. 521 Diag(Identifier, diag::err_pp_used_poisoned_id); 522 else 523 Diag(Identifier, diag::ext_pp_bad_vaargs_use); 524 } 525 526 // If this is a macro to be expanded, do it. 527 if (MacroInfo *MI = getMacroInfo(&II)) { 528 if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { 529 if (MI->isEnabled()) { 530 if (!HandleMacroExpandedIdentifier(Identifier, MI)) 531 return; 532 } else { 533 // C99 6.10.3.4p2 says that a disabled macro may never again be 534 // expanded, even if it's in a context where it could be expanded in the 535 // future. 536 Identifier.setFlag(Token::DisableExpand); 537 } 538 } 539 } 540 541 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 542 // then we act as if it is the actual operator and not the textual 543 // representation of it. 544 if (II.isCPlusPlusOperatorKeyword()) 545 Identifier.setIdentifierInfo(0); 546 547 // If this is an extension token, diagnose its use. 548 // We avoid diagnosing tokens that originate from macro definitions. 549 // FIXME: This warning is disabled in cases where it shouldn't be, 550 // like "#define TY typeof", "TY(1) x". 551 if (II.isExtensionToken() && !DisableMacroExpansion) 552 Diag(Identifier, diag::ext_token_used); 553} 554 555void Preprocessor::AddCommentHandler(CommentHandler *Handler) { 556 assert(Handler && "NULL comment handler"); 557 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 558 CommentHandlers.end() && "Comment handler already registered"); 559 CommentHandlers.push_back(Handler); 560} 561 562void Preprocessor::RemoveCommentHandler(CommentHandler *Handler) { 563 std::vector<CommentHandler *>::iterator Pos 564 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 565 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 566 CommentHandlers.erase(Pos); 567} 568 569void Preprocessor::HandleComment(SourceRange Comment) { 570 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 571 HEnd = CommentHandlers.end(); 572 H != HEnd; ++H) 573 (*H)->HandleComment(*this, Comment); 574} 575 576CommentHandler::~CommentHandler() { } 577