15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// 25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// The LLVM Compiler Infrastructure 45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source 60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details. 75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 10d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner// This file implements the Lexer and Token interfaces. 115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/Lexer.h" 15651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include "UnicodeCharSets.h" 169893902eceba7f01dd1521349d33866f77254d78Jordan Rose#include "clang/Basic/CharInfo.h" 179dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner#include "clang/Basic/SourceManager.h" 1855fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/CodeCompletionHandler.h" 1955fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/LexDiagnostic.h" 2006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith#include "clang/Lex/LiteralSupport.h" 2155fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/Preprocessor.h" 22d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis#include "llvm/ADT/STLExtras.h" 23c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose#include "llvm/ADT/StringExtras.h" 2455fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "llvm/ADT/StringSwitch.h" 25409a03671224d4e5bdab1594c43baf070148f830Chris Lattner#include "llvm/Support/Compiler.h" 26cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h" 275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/Support/MemoryBuffer.h" 282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper#include <cstring> 295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang; 305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 32dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner// Token Class Implementation 33dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 34dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. 36dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattnerbool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { 37bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor if (IdentifierInfo *II = getIdentifierInfo()) 38bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor return II->getObjCKeywordID() == objcKey; 39bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor return false; 40dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner} 41dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 42dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner/// getObjCKeywordID - Return the ObjC keyword kind. 43dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattnertok::ObjCKeywordKind Token::getObjCKeywordID() const { 44dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner IdentifierInfo *specId = getIdentifierInfo(); 45dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; 46dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner} 47dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 4853702cd401b8fdca985aede7732c2f6a82ad9b1cChris Lattner 49dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 50dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner// Lexer Class Implementation 51dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 52dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 5399ba9e3bd70671f3441fb974895f226a83ce0e66David Blaikievoid Lexer::anchor() { } 5499ba9e3bd70671f3441fb974895f226a83ce0e66David Blaikie 551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpvoid Lexer::InitLexer(const char *BufStart, const char *BufPtr, 5622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner const char *BufEnd) { 5722d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferStart = BufStart; 5822d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferPtr = BufPtr; 5922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferEnd = BufEnd; 601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 6122d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner assert(BufEnd[0] == 0 && 6222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner "We assume that the input buffer has a null character at the end" 6322d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner " to simplify lexing!"); 641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 65156119df1d076b63609618976281961283f871dbEric Christopher // Check whether we have a BOM in the beginning of the buffer. If yes - act 66156119df1d076b63609618976281961283f871dbEric Christopher // accordingly. Right now we support only UTF-8 with and without BOM, so, just 67156119df1d076b63609618976281961283f871dbEric Christopher // skip the UTF-8 BOM if it's present. 68156119df1d076b63609618976281961283f871dbEric Christopher if (BufferStart == BufferPtr) { 69156119df1d076b63609618976281961283f871dbEric Christopher // Determine the size of the BOM. 705f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buf(BufferStart, BufferEnd - BufferStart); 71969f9d47338fc36ebb6d24ad3a51e45eda07fd58Eli Friedman size_t BOMLength = llvm::StringSwitch<size_t>(Buf) 72156119df1d076b63609618976281961283f871dbEric Christopher .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM 73156119df1d076b63609618976281961283f871dbEric Christopher .Default(0); 74156119df1d076b63609618976281961283f871dbEric Christopher 75156119df1d076b63609618976281961283f871dbEric Christopher // Skip the BOM. 76156119df1d076b63609618976281961283f871dbEric Christopher BufferPtr += BOMLength; 77156119df1d076b63609618976281961283f871dbEric Christopher } 78156119df1d076b63609618976281961283f871dbEric Christopher 7922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner Is_PragmaLexer = false; 80d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = CMK_None; 81156119df1d076b63609618976281961283f871dbEric Christopher 8222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // Start of the file is a start of line. 8322d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner IsAtStartOfLine = true; 84d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 85d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 86d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = false; 87d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = false; 881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 8922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not after parsing a #. 9022d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ParsingPreprocessorDirective = false; 911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 9222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not after parsing #include. 9322d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ParsingFilename = false; 941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 9522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not in raw mode. Raw mode disables diagnostics and interpretation 9622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // of tokens (e.g. identifiers, thus disabling macro expansion). It is used 9722d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block 9822d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // or otherwise skipping over tokens. 9922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner LexingRawMode = false; 1001eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10122d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // Default to not keeping comments. 10222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ExtendedTokenMode = 0; 10322d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner} 10422d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner 1050770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// Lexer constructor - Create a new lexer object for the specified buffer 1060770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// with the specified preprocessor managing the lexing process. This lexer 1070770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// assumes that the associated file buffer and Preprocessor objects will 1080770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// outlive it, so it doesn't take ownership of either of them. 1096e2901407bff59aeb4cc301cc58b034723d0eb49Chris LattnerLexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) 11088d3ac1341aa016cabd966c5b113a95ac05ea43fChris Lattner : PreprocessorLexer(&PP, FID), 11188d3ac1341aa016cabd966c5b113a95ac05ea43fChris Lattner FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), 1124e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie LangOpts(PP.getLangOpts()) { 1131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1140770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), 1150770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner InputFile->getBufferEnd()); 1161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1176aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose resetExtendedTokenMode(); 1186aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose} 1196aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 1206aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rosevoid Lexer::resetExtendedTokenMode() { 1216aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose assert(PP && "Cannot reset token mode without a preprocessor"); 1226aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose if (LangOpts.TraditionalCPP) 1236aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SetKeepWhitespaceMode(true); 1246aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose else 1256aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SetCommentRetentionState(PP->getCommentRetentionState()); 1260770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner} 127dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 128168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner/// Lexer constructor - Create a new raw lexer object. This object is only 129092bf67e5ca560d2fc6aa70be1f172b8b3a5ff96Dmitri Gribenko/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text 130590f0cc643274267d4d41125b62557e1d87886c3Chris Lattner/// range will outlive it, so it doesn't take ownership of it. 1314e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid BlaikieLexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, 132de96c0f29c4cacabe6ea577c61db87c2a85aea6cChris Lattner const char *BufStart, const char *BufPtr, const char *BufEnd) 1334e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie : FileLoc(fileloc), LangOpts(langOpts) { 13422d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner 13522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner InitLexer(BufStart, BufPtr, BufEnd); 1361eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 137168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner // We *are* in raw mode. 138168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner LexingRawMode = true; 1395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 141025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner/// Lexer constructor - Create a new raw lexer object. This object is only 142092bf67e5ca560d2fc6aa70be1f172b8b3a5ff96Dmitri Gribenko/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text 143025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner/// range will outlive it, so it doesn't take ownership of it. 1446e2901407bff59aeb4cc301cc58b034723d0eb49Chris LattnerLexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, 1454e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const SourceManager &SM, const LangOptions &langOpts) 1463ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar : Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(), 1473ea9e33ea25e0c2b12db56418ba3f994eb662c04Pirama Arumuga Nainar FromFile->getBufferStart(), FromFile->getBufferEnd()) {} 148025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner 14942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for 15042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// _Pragma expansion. This has a variety of magic semantics that this method 15142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// sets up. It returns a new'd Lexer that must be delete'd when done. 15242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 15342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// On entrance to this routine, TokStartLoc is a macro location which has a 15442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// spelling loc that indicates the bytes to be lexed for the token and an 155433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// expansion location that indicates where all lexed tokens should be 15642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// "expanded from". 15742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 1586bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines/// TODO: It would really be nice to make _Pragma just be a wrapper around a 15942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// normal lexer that remaps tokens as they fly by. This would require making 16042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// Preprocessor::Lex virtual. Given that, we could just dump in a magic lexer 16142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// interface that could handle this stuff. This would pull GetMappedTokenLoc 16242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// out of the critical path of the lexer! 16342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 1641eb4433ac451dc16f4133a88af2d002ac26c58efMike StumpLexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, 165433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth SourceLocation ExpansionLocStart, 166433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth SourceLocation ExpansionLocEnd, 167bcc2a67e5180612417727cbdd8afd0f79fdf726dChris Lattner unsigned TokLen, Preprocessor &PP) { 16842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner SourceManager &SM = PP.getSourceManager(); 16942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 17042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Create the lexer as if we were going to lex the file normally. 171a11d61793341fea195c29a0dab3fbd74f2b39a8cChris Lattner FileID SpellingFID = SM.getFileID(SpellingLoc); 1726e2901407bff59aeb4cc301cc58b034723d0eb49Chris Lattner const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID); 1736e2901407bff59aeb4cc301cc58b034723d0eb49Chris Lattner Lexer *L = new Lexer(SpellingFID, InputFile, PP); 1741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 17542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Now that the lexer is created, change the start/end locations so that we 17642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // just lex the subsection of the file that we want. This is lexing from a 17742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // scratch buffer. 17842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner const char *StrData = SM.getCharacterData(SpellingLoc); 1791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 18042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->BufferPtr = StrData; 18142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->BufferEnd = StrData+TokLen; 1821fa495304c81e03f07f278a47b5efe9317104aabChris Lattner assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); 18342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 18442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Set the SourceLocation with the remapping information. This ensures that 18542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // GetMappedTokenLoc will remap the tokens as they are lexed. 186bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), 187bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth ExpansionLocStart, 188bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth ExpansionLocEnd, TokLen); 1891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Ensure that the lexer thinks it is inside a directive, so that end \n will 19184021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // return an EOD token. 19242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->ParsingPreprocessorDirective = true; 1931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // This lexer really is for _Pragma. 19542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->Is_PragmaLexer = true; 19642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner return L; 19742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner} 19842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 199168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner 2005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Stringify - Convert the specified string into a C string, with surrounding 2015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ""'s, and with escaped \ and " characters. 202b6d6993e6e6d3daf4d9876794254d20a134e37c2Pirama Arumuga Nainarstd::string Lexer::Stringify(StringRef Str, bool Charify) { 2035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer std::string Result = Str; 2045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Quote = Charify ? '\'' : '"'; 2055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (unsigned i = 0, e = Result.size(); i != e; ++i) { 2065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Result[i] == '\\' || Result[i] == Quote) { 2075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Result.insert(Result.begin()+i, '\\'); 2085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++i; ++e; 2095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return Result; 2125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 2135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 214d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner/// Stringify - Convert the specified string into a C string by escaping '\' 215d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner/// and " characters. This does not add surrounding ""'s to the string. 2165f9e272e632e951b1efe824cd16acb4d96077930Chris Lattnervoid Lexer::Stringify(SmallVectorImpl<char> &Str) { 217d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner for (unsigned i = 0, e = Str.size(); i != e; ++i) { 218d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner if (Str[i] == '\\' || Str[i] == '"') { 219d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner Str.insert(Str.begin()+i, '\\'); 220d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner ++i; ++e; 221d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner } 222d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner } 223d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner} 224d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner 225b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner//===----------------------------------------------------------------------===// 226b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner// Token Spelling 227b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner//===----------------------------------------------------------------------===// 228b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 22930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith/// \brief Slow case of getSpelling. Extract the characters comprising the 23030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith/// spelling of this token from the provided input buffer. 23130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smithstatic size_t getSpellingSlow(const Token &Tok, const char *BufPtr, 23230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const LangOptions &LangOpts, char *Spelling) { 23330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith assert(Tok.needsCleaning() && "getSpellingSlow called on simple token"); 23430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 23530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith size_t Length = 0; 23630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *BufEnd = BufPtr + Tok.getLength(); 23730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 23887d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar if (tok::isStringLiteral(Tok.getKind())) { 23930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Munch the encoding-prefix and opening double-quote. 24030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith while (BufPtr < BufEnd) { 24130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith unsigned Size; 24230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 24330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += Size; 24430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 24530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith if (Spelling[Length - 1] == '"') 24630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith break; 24730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 24830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 24930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Raw string literals need special handling; trigraph expansion and line 25030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // splicing do not occur within their d-char-sequence nor within their 25130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // r-char-sequence. 25230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith if (Length >= 2 && 25330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { 25430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Search backwards from the end of the token to find the matching closing 25530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // quote. 25630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *RawEnd = BufEnd; 25730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith do --RawEnd; while (*RawEnd != '"'); 25830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith size_t RawLength = RawEnd - BufPtr + 1; 25930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 26030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Everything between the quotes is included verbatim in the spelling. 26130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith memcpy(Spelling + Length, BufPtr, RawLength); 26230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Length += RawLength; 26330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += RawLength; 26430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 26530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // The rest of the token is lexed normally. 26630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 26730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 26830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 26930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith while (BufPtr < BufEnd) { 27030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith unsigned Size; 27130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 27230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += Size; 27330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 27430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 27530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith assert(Length < Tok.getLength() && 27630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith "NeedsCleaning flag set on token that didn't need cleaning!"); 27730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return Length; 27830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith} 27930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 280b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// getSpelling() - Return the 'spelling' of this token. The spelling of a 281b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// token are the characters used to represent the token in the source file 282b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// after trigraph expansion and escaped-newline folding. In particular, this 283b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// wants to get the true, uncanonicalized, spelling of things like digraphs 284b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// UCNs, etc. 2855f9e272e632e951b1efe824cd16acb4d96077930Chris LattnerStringRef Lexer::getSpelling(SourceLocation loc, 28630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith SmallVectorImpl<char> &buffer, 28730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const SourceManager &SM, 28830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const LangOptions &options, 28930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith bool *invalid) { 290834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Break down the source location. 291834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc); 292834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 293834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Try to the load the file buffer. 294834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall bool invalidTemp = false; 2955f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); 296834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (invalidTemp) { 297834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (invalid) *invalid = true; 2985f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(); 299834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall } 300834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 301834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall const char *tokenBegin = file.data() + locInfo.second; 302834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 303834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Lex from the start of the given location. 304834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, 305834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall file.begin(), tokenBegin, file.end()); 306834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall Token token; 307834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall lexer.LexFromRawLexer(token); 308834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 309834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall unsigned length = token.getLength(); 310834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 311834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Common case: no need for cleaning. 312834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (!token.needsCleaning()) 3135f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(tokenBegin, length); 314834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 31530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Hard case, we need to relex the characters into the string. 31630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith buffer.resize(length); 31730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data())); 3185f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(buffer.data(), buffer.size()); 319834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall} 320834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 321834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// getSpelling() - Return the 'spelling' of this token. The spelling of a 322834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// token are the characters used to represent the token in the source file 323834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// after trigraph expansion and escaped-newline folding. In particular, this 324834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// wants to get the true, uncanonicalized, spelling of things like digraphs 325834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// UCNs, etc. 326b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattnerstd::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, 3274e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts, bool *Invalid) { 328b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 32930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 330b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner bool CharDataInvalid = false; 33130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 332b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner &CharDataInvalid); 333b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Invalid) 334b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner *Invalid = CharDataInvalid; 335b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (CharDataInvalid) 336b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return std::string(); 33730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 33830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // If this token contains nothing interesting, return it directly. 339b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (!Tok.needsCleaning()) 34030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return std::string(TokStart, TokStart + Tok.getLength()); 34130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 342b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner std::string Result; 34330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Result.resize(Tok.getLength()); 34430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin())); 345b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return Result; 346b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner} 347b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 348b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// getSpelling - This method is used to get the spelling of a token into a 349b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// preallocated buffer, instead of as an std::string. The caller is required 350b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// to allocate enough space for the token, which is guaranteed to be at least 351b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// Tok.getLength() bytes long. The actual length of the token is returned. 352b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// 353b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// Note that this method may do two possible things: it may either fill in 354b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// the buffer specified with characters, or it may *change the input pointer* 355b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// to point to a constant buffer with the data already in it (avoiding a 356b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// copy). The caller is not allowed to modify the returned buffer pointer 357b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// if an internal buffer is returned. 358b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattnerunsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 359b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner const SourceManager &SourceMgr, 3604e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts, bool *Invalid) { 361b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 362c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 3636bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines const char *TokStart = nullptr; 364c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // NOTE: this has to be checked *before* testing for an IdentifierInfo. 365c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (Tok.is(tok::raw_identifier)) 3666bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines TokStart = Tok.getRawIdentifier().data(); 367c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else if (!Tok.hasUCN()) { 368c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { 369c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Just return the string from the identifier table, which is very quick. 370c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Buffer = II->getNameStart(); 371c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return II->getLength(); 372c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 373b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 374c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 375c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // NOTE: this can be checked even after testing for an IdentifierInfo. 376b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Tok.isLiteral()) 377b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner TokStart = Tok.getLiteralData(); 378c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 3796bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines if (!TokStart) { 380c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // Compute the start of the token in the input lexer buffer. 381b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner bool CharDataInvalid = false; 382b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); 383b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Invalid) 384b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner *Invalid = CharDataInvalid; 385b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (CharDataInvalid) { 386b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner Buffer = ""; 387b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return 0; 388b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 389b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 390c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 391b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner // If this token contains nothing interesting, return it directly. 392b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (!Tok.needsCleaning()) { 393b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner Buffer = TokStart; 394b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return Tok.getLength(); 395b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 396c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 397b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner // Otherwise, hard case, relex the characters into the string. 39830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); 399b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner} 400b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 401b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 4029a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// MeasureTokenLength - Relex the token at the specified location and return 4039a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// its length in bytes in the input file. If the token needs cleaning (e.g. 4049a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// includes a trigraph or an escaped newline) then this count includes bytes 4059a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// that are part of that. 4069a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattnerunsigned Lexer::MeasureTokenLength(SourceLocation Loc, 4072c78b873f4f3823ae859c15674cb3d76c8554113Chris Lattner const SourceManager &SM, 4082c78b873f4f3823ae859c15674cb3d76c8554113Chris Lattner const LangOptions &LangOpts) { 409d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis Token TheTok; 410d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis if (getRawToken(Loc, TheTok, SM, LangOpts)) 411d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return 0; 412d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return TheTok.getLength(); 413d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis} 414d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis 415d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis/// \brief Relex the token at the specified location. 416d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis/// \returns true if there was a failure, false on success. 417d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidisbool Lexer::getRawToken(SourceLocation Loc, Token &Result, 418d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis const SourceManager &SM, 419f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian const LangOptions &LangOpts, 420f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian bool IgnoreWhiteSpace) { 4219a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // TODO: this could be special cased for common tokens like identifiers, ')', 4229a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // etc to make this faster, if it mattered. Just look at StrData[0] to handle 4231eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // all obviously single-char tokens. This could use 4249a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // Lexer::isObviouslySimpleCharacter for example to handle identifiers or 4259a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // something. 426de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner 427de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner // If this comes from a macro expansion, we really do want the macro name, not 428de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner // the token this macro expanded to. 429402785357ab053dd53f4fdd858b9630a5e0f8badChandler Carruth Loc = SM.getExpansionLoc(Loc); 430363fdc29656cc03c1817268888f95e6343470aa8Chris Lattner std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 431f715ca12bfc9fddfde75f98a197424434428b821Douglas Gregor bool Invalid = false; 4325f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 433f715ca12bfc9fddfde75f98a197424434428b821Douglas Gregor if (Invalid) 434d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return true; 435f6ac97b101c8840efa92bf29166077ce4049e293Benjamin Kramer 436f6ac97b101c8840efa92bf29166077ce4049e293Benjamin Kramer const char *StrData = Buffer.data()+LocInfo.second; 4378350394c65b81bba3986dfe44ae17423873741deChris Lattner 438f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) 439d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return true; 44033e9abd21083a0191a7676a04b497006d2da184dDouglas Gregor 4419a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // Create a lexer starting at the beginning of this token. 442c3526d89ef9c31639ec8b25180cfb22354344241Sebastian Redl Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, 443c3526d89ef9c31639ec8b25180cfb22354344241Sebastian Redl Buffer.begin(), StrData, Buffer.end()); 44439de7409bffb6b725a8aa64f0ba77ab51e8c9eb3Chris Lattner TheLexer.SetCommentRetentionState(true); 445d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis TheLexer.LexFromRawLexer(Result); 446d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return false; 4479a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner} 4489a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner 4490e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidisstatic SourceLocation getBeginningOfFileToken(SourceLocation Loc, 4500e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const SourceManager &SM, 4510e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const LangOptions &LangOpts) { 4520e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis assert(Loc.isFileID()); 453a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 4543de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor if (LocInfo.first.isInvalid()) 4553de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor return Loc; 4563de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor 457a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor bool Invalid = false; 4585f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 459a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (Invalid) 460a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 461a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 462a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Back up from the current location until we hit the beginning of a line 463a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // (or the buffer). We'll relex from that point. 464a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *BufStart = Buffer.data(); 4653de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor if (LocInfo.second >= Buffer.size()) 4663de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor return Loc; 4673de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor 468a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *StrData = BufStart+LocInfo.second; 469a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (StrData[0] == '\n' || StrData[0] == '\r') 470a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 471a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 472a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *LexStart = StrData; 473a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor while (LexStart != BufStart) { 474a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (LexStart[0] == '\n' || LexStart[0] == '\r') { 475a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor ++LexStart; 476a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor break; 477a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 478a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 479a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor --LexStart; 480a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 481a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 482a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Create a lexer starting at the beginning of this token. 483a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); 484a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); 485a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor TheLexer.SetCommentRetentionState(true); 486a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 487a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Lex tokens until we find the token that contains the source location. 488a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor Token TheTok; 489a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor do { 490a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 491a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 492a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (TheLexer.getBufferLocation() > StrData) { 493a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Lexing this token has taken the lexer past the source location we're 494a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // looking for. If the current token encompasses our source location, 495a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // return the beginning of that token. 496a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) 497a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return TheTok.getLocation(); 498a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 499a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // We ended up skipping over the source location entirely, which means 500a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // that it points into whitespace. We're done here. 501a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor break; 502a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 503a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } while (TheTok.getKind() != tok::eof); 504a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 505a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // We've passed our source location; just return the original source location. 506a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 507a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor} 508a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 5090e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios KyrtzidisSourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, 5100e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const SourceManager &SM, 5110e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const LangOptions &LangOpts) { 5120e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis if (Loc.isFileID()) 5130e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis return getBeginningOfFileToken(Loc, SM, LangOpts); 5140e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 5150e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis if (!SM.isMacroArgExpansion(Loc)) 5160e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis return Loc; 5170e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 5180e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis SourceLocation FileLoc = SM.getSpellingLoc(Loc); 5190e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); 5200e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); 521ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth std::pair<FileID, unsigned> BeginFileLocInfo 522ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth = SM.getDecomposedLoc(BeginFileLoc); 5230e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis assert(FileLocInfo.first == BeginFileLocInfo.first && 5240e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis FileLocInfo.second >= BeginFileLocInfo.second); 525ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); 5260e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis} 5270e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 528f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregornamespace { 529f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor enum PreambleDirectiveKind { 530f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_Skipped, 531f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_StartIf, 532f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_EndIf, 533f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_Unknown 534f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor }; 535f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor} 536f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 537176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hinesstd::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, 538176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines const LangOptions &LangOpts, 539176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines unsigned MaxLines) { 540f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Create a lexer starting at the beginning of the file. Note that we use a 541f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // "fake" file source location at offset 1 so that the lexer will track our 542f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // position within the file. 543f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor const unsigned StartOffset = 1; 5441cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset); 545176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(), 546176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines Buffer.end()); 547355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis TheLexer.SetCommentRetentionState(true); 5481cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis 5491cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis // StartLoc will differ from FileLoc if there is a BOM that was skipped. 5501cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis SourceLocation StartLoc = TheLexer.getSourceLocation(); 5511cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis 552f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor bool InPreprocessorDirective = false; 553f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token TheTok; 554f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token IfStartTok; 555f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor unsigned IfCount = 0; 556355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis SourceLocation ActiveCommentLoc; 557c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis 558c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned MaxLineOffset = 0; 559c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (MaxLines) { 560176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines const char *CurPtr = Buffer.begin(); 561c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned CurLine = 0; 562176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines while (CurPtr != Buffer.end()) { 563c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis char ch = *CurPtr++; 564c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (ch == '\n') { 565c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis ++CurLine; 566c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (CurLine == MaxLines) 567c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis break; 568c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 569c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 570176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines if (CurPtr != Buffer.end()) 571176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines MaxLineOffset = CurPtr - Buffer.begin(); 572c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 573df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 574f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor do { 575f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 576f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 577f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (InPreprocessorDirective) { 578f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // If we've hit the end of the file, we're done. 579f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (TheTok.getKind() == tok::eof) { 580f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 581f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 582f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 583f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // If we haven't hit the end of the preprocessor directive, skip this 584f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // token. 585f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (!TheTok.isAtStartOfLine()) 586f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 587f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 588f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We've passed the end of the preprocessor directive, and will look 589f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // at this token again below. 590f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = false; 591f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 592f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 593df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // Keep track of the # of lines in the preamble. 594df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor if (TheTok.isAtStartOfLine()) { 595c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; 596df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 597df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // If we were asked to limit the number of lines in the preamble, 598df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // and we're about to exceed that limit, we're done. 599c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (MaxLineOffset && TokOffset >= MaxLineOffset) 600df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor break; 601df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor } 602df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 603f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Comments are okay; skip over them. 604355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (TheTok.getKind() == tok::comment) { 605355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (ActiveCommentLoc.isInvalid()) 606355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis ActiveCommentLoc = TheTok.getLocation(); 607f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 608355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis } 609f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 610f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { 611f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // This is the start of a preprocessor directive. 612f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token HashTok = TheTok; 613f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = true; 614355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis ActiveCommentLoc = SourceLocation(); 615f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 61619207f1e5f51261a33492602501fb7ada50ea546Joerg Sonnenberger // Figure out which directive this is. Since we're lexing raw tokens, 617f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // we don't have an identifier table available. Instead, just look at 618f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // the raw identifier to recognize and categorize preprocessor directives. 619f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 620c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { 6216bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines StringRef Keyword = TheTok.getRawIdentifier(); 622f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PreambleDirectiveKind PDK 623f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) 624f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("include", PDK_Skipped) 625f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("__include_macros", PDK_Skipped) 626f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("define", PDK_Skipped) 627f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("undef", PDK_Skipped) 628f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("line", PDK_Skipped) 629f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("error", PDK_Skipped) 630f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("pragma", PDK_Skipped) 631f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("import", PDK_Skipped) 632f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("include_next", PDK_Skipped) 633f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("warning", PDK_Skipped) 634f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ident", PDK_Skipped) 635f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("sccs", PDK_Skipped) 636f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("assert", PDK_Skipped) 637f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("unassert", PDK_Skipped) 638f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("if", PDK_StartIf) 639f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ifdef", PDK_StartIf) 640f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ifndef", PDK_StartIf) 641f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("elif", PDK_Skipped) 642f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("else", PDK_Skipped) 643f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("endif", PDK_EndIf) 644f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Default(PDK_Unknown); 645f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 646f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor switch (PDK) { 647f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_Skipped: 648f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 649f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 650f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_StartIf: 651f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (IfCount == 0) 652f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor IfStartTok = HashTok; 653f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 654f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor ++IfCount; 655f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 656f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 657f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_EndIf: 658f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Mismatched #endif. The preamble ends here. 659f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (IfCount == 0) 660f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 661f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 662f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor --IfCount; 663f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 664f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 665f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_Unknown: 666f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We don't know what this directive is; stop at the '#'. 667f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 668f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 669f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 670f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 671f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We only end up here if we didn't recognize the preprocessor 672f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // directive or it was one that can't occur in the preamble at this 673f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // point. Roll back the current token to the location of the '#'. 674f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = false; 675f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheTok = HashTok; 676f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 677f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 678df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // We hit a token that we don't recognize as being in the 679df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // "preprocessing only" part of the file, so we're no longer in 680df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // the preamble. 681f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 682f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } while (true); 683f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 684355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis SourceLocation End; 685355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (IfCount) 686355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = IfStartTok.getLocation(); 687355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis else if (ActiveCommentLoc.isValid()) 688355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = ActiveCommentLoc; // don't truncate a decl comment. 689355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis else 690355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = TheTok.getLocation(); 691355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis 692f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), 693f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor IfCount? IfStartTok.isAtStartOfLine() 694f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor : TheTok.isAtStartOfLine()); 695f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor} 696f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 6977ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 6987ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// AdvanceToTokenCharacter - Given a location that specifies the start of a 6997ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token, return a new location that specifies a character within the token. 7007ef5c27eb6e8ebe58b52013246c06753c3613263Chris LattnerSourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, 7017ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned CharNo, 7027ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const SourceManager &SM, 7034e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 704433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // Figure out how many physical characters away the specified expansion 7057ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // character is. This needs to take into consideration newlines and 7067ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // trigraphs. 7077ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner bool Invalid = false; 7087ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const char *TokPtr = SM.getCharacterData(TokStart, &Invalid); 7097ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7107ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // If they request the first char of the token, we're trivially done. 7117ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) 7127ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return TokStart; 7137ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7147ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned PhysOffset = 0; 7157ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7167ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // The usual case is that tokens don't contain anything interesting. Skip 7177ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // over the uninteresting characters. If a token only consists of simple 7187ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // chars, this method is extremely fast. 7197ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { 7207ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (CharNo == 0) 721a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokStart.getLocWithOffset(PhysOffset); 7224967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar ++TokPtr; 7234967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar --CharNo; 7244967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar ++PhysOffset; 7257ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner } 7267ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7277ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // If we have a character that may be a trigraph or escaped newline, use a 7287ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // lexer to parse it correctly. 7297ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner for (; CharNo; --CharNo) { 7307ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned Size; 7314e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); 7327ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner TokPtr += Size; 7337ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner PhysOffset += Size; 7347ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner } 7357ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7367ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // Final detail: if we end up on an escaped newline, we want to return the 7377ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // location of the actual byte of the token. For example foo\<newline>bar 7387ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // advanced by 3 should return the location of b, not of \\. One compounding 7397ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // detail of this is that the escape may be made by a trigraph. 7407ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) 7417ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; 7427ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 743a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokStart.getLocWithOffset(PhysOffset); 7447ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner} 7457ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7467ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// \brief Computes the source location just past the end of the 7477ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token at this source location. 7487ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// 7497ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// This routine can be used to produce a source location that 7507ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// points just past the end of the token referenced by \p Loc, and 7517ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// is generally used when a diagnostic needs to point just after a 7527ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token where it expected something different that it received. If 7537ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// the returned source location would not be meaningful (e.g., if 7547ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// it points into a macro), this routine returns an invalid 7557ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// source location. 7567ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// 7577ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// \param Offset an offset from the end of the token, where the source 7587ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// location should refer to. The default offset (0) produces a source 7597ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// location pointing just past the end of the token; an offset of 1 produces 7607ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// a source location pointing to the last character in the token, etc. 7617ef5c27eb6e8ebe58b52013246c06753c3613263Chris LattnerSourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, 7627ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const SourceManager &SM, 7634e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 7647ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis if (Loc.isInvalid()) 7657ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return SourceLocation(); 7667ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis 7677ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis if (Loc.isMacroID()) { 7684e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) 769433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth return SourceLocation(); // Points inside the macro expansion. 7707ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis } 7717ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis 7724e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts); 7737ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (Len > Offset) 7747ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner Len = Len - Offset; 7757ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner else 7767ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return Loc; 7777ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 778a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return Loc.getLocWithOffset(Len); 7797ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner} 7807ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7817a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis/// \brief Returns true if the given MacroID location points at the first 782433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// token of the macro expansion. 783433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruthbool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, 784f62d43d2afe1960755a1b5813cae1e5983bcac1bDouglas Gregor const SourceManager &SM, 78569bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const LangOptions &LangOpts, 78669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis SourceLocation *MacroBegin) { 7877a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); 7887a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 789c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation expansionLoc; 790c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) 791c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return false; 792c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis 79369bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (expansionLoc.isFileID()) { 79469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis // No other macro expansions, this is the first. 79569bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (MacroBegin) 79669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis *MacroBegin = expansionLoc; 79769bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return true; 79869bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis } 7997a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 80069bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin); 8017a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis} 8027a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 8037a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis/// \brief Returns true if the given MacroID location points at the last 804433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// token of the macro expansion. 805433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruthbool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, 80669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const SourceManager &SM, 80769bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const LangOptions &LangOpts, 80869bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis SourceLocation *MacroEnd) { 8097a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); 8107a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 8117a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis SourceLocation spellLoc = SM.getSpellingLoc(loc); 8127a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis unsigned tokLen = MeasureTokenLength(spellLoc, SM, LangOpts); 8137a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis if (tokLen == 0) 8147a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis return false; 8157a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 816c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation afterLoc = loc.getLocWithOffset(tokLen); 817c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation expansionLoc; 818c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) 819c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return false; 820f8c50652f7b224e66b0b6098d1fba07e036019b4Argyrios Kyrtzidis 82169bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (expansionLoc.isFileID()) { 82269bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis // No other macro expansions. 82369bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (MacroEnd) 82469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis *MacroEnd = expansionLoc; 82569bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return true; 82669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis } 8277a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 82869bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd); 8297a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis} 8307a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 831a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidisstatic CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, 832d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis const SourceManager &SM, 833d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis const LangOptions &LangOpts) { 834a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation Begin = Range.getBegin(); 835a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation End = Range.getEnd(); 836d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis assert(Begin.isFileID() && End.isFileID()); 837a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (Range.isTokenRange()) { 838a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); 839a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (End.isInvalid()) 840a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return CharSourceRange(); 841a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis } 842d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 843d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis // Break down the source locations. 844d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis FileID FID; 845d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis unsigned BeginOffs; 846651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines std::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); 847d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (FID.isInvalid()) 848d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 849d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 850d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis unsigned EndOffs; 851d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (!SM.isInFileID(End, FID, &EndOffs) || 852d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis BeginOffs > EndOffs) 853d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 854d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 855d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange::getCharRange(Begin, End); 856d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis} 857d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 858a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios KyrtzidisCharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, 85911b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis const SourceManager &SM, 86011b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis const LangOptions &LangOpts) { 861a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation Begin = Range.getBegin(); 862a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation End = Range.getEnd(); 863d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isInvalid() || End.isInvalid()) 86411b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis return CharSourceRange(); 86511b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 866d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isFileID() && End.isFileID()) 867a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 868d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 869d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isMacroID() && End.isFileID()) { 87011b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin)) 87111b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis return CharSourceRange(); 872a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setBegin(Begin); 873a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 874d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 87511b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 876d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isFileID() && End.isMacroID()) { 877a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, 878a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &End)) || 879a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, 880a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &End))) 881d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 882a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setEnd(End); 883a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 884d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 88511b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 886d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis assert(Begin.isMacroID() && End.isMacroID()); 887d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis SourceLocation MacroBegin, MacroEnd; 888d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) && 889a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts, 890a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &MacroEnd)) || 891a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts, 892a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &MacroEnd)))) { 893a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setBegin(MacroBegin); 894a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setEnd(MacroEnd); 895a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 896a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis } 897d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 898c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis bool Invalid = false; 899c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), 900c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis &Invalid); 901c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (Invalid) 902e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return CharSourceRange(); 903e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 904c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (BeginEntry.getExpansion().isMacroArgExpansion()) { 905c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), 906c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis &Invalid); 907c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (Invalid) 908c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return CharSourceRange(); 90911b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 910c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (EndEntry.getExpansion().isMacroArgExpansion() && 911c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis BeginEntry.getExpansion().getExpansionLocStart() == 912c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis EndEntry.getExpansion().getExpansionLocStart()) { 913c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis Range.setBegin(SM.getImmediateSpellingLoc(Begin)); 914c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis Range.setEnd(SM.getImmediateSpellingLoc(End)); 915c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return makeFileCharRange(Range, SM, LangOpts); 916c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis } 917d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 918d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 919d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 92011b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis} 92111b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 922e64d9037658a1b95c79ea275af6167a110b3c563Argyrios KyrtzidisStringRef Lexer::getSourceText(CharSourceRange Range, 923e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis const SourceManager &SM, 924e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis const LangOptions &LangOpts, 925e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis bool *Invalid) { 926a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range = makeFileCharRange(Range, SM, LangOpts); 927a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (Range.isInvalid()) { 928e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 929e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 930e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 931e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 932e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis // Break down the source location. 933e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin()); 934e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (beginInfo.first.isInvalid()) { 935e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 936e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 937e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 938e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 939e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis unsigned EndOffs; 940e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || 941e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis beginInfo.second > EndOffs) { 942e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 943e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 944e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 945e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 946e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis // Try to the load the file buffer. 947e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis bool invalidTemp = false; 948e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); 949e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (invalidTemp) { 950e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 951e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 952e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 953e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 954e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = false; 955e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return file.substr(beginInfo.second, EndOffs - beginInfo.second); 956e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis} 957e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 958c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna ZaksStringRef Lexer::getImmediateMacroName(SourceLocation Loc, 959c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks const SourceManager &SM, 960c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks const LangOptions &LangOpts) { 961c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks assert(Loc.isMacroID() && "Only reasonble to call this on macros"); 9627f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9637f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Find the location of the immediate macro expansion. 9647f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis while (1) { 9657f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis FileID FID = SM.getFileID(Loc); 9667f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); 9677f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 9687f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = Expansion.getExpansionLocStart(); 9697f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (!Expansion.isMacroArgExpansion()) 9707f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; 9717f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9727f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // For macro arguments we need to check that the argument did not come 9737f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // from an inner macro, e.g: "MAC1( MAC2(foo) )" 9747f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9757f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Loc points to the argument id of the macro definition, move to the 9767f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // macro expansion. 977c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks Loc = SM.getImmediateExpansionRange(Loc).first; 9787f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis SourceLocation SpellLoc = Expansion.getSpellingLoc(); 9797f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (SpellLoc.isFileID()) 9807f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; // No inner macro. 9817f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9827f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // If spelling location resides in the same FileID as macro expansion 9837f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // location, it means there is no inner macro. 9847f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis FileID MacroFID = SM.getFileID(Loc); 9857f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (SM.isInFileID(SpellLoc, MacroFID)) 9867f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; 9877f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9887f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Argument came from inner macro. 9897f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = SpellLoc; 9907f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis } 991c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 992c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // Find the spelling location of the start of the non-argument expansion 993c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // range. This is where the macro name was spelled in order to begin 994c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // expanding this macro. 9957f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = SM.getSpellingLoc(Loc); 996c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 997c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // Dig out the buffer where the macro name was spelled and the extents of the 998c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // name so that we can render it into the expansion note. 999c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); 1000c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); 1001c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); 1002c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); 1003c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks} 1004c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 10054967a710c84587c654b56c828382219c3937dacbPirama Arumuga NainarStringRef Lexer::getImmediateMacroNameForDiagnostics( 10064967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { 10074967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar assert(Loc.isMacroID() && "Only reasonble to call this on macros"); 10084967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // Walk past macro argument expanions. 10094967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar while (SM.isMacroArgExpansion(Loc)) 10104967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar Loc = SM.getImmediateExpansionRange(Loc).first; 10114967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 10124967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // If the macro's spelling has no FileID, then it's actually a token paste 10134967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // or stringization (or similar) and not a macro at all. 10144967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc)))) 10154967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar return StringRef(); 10164967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 10174967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // Find the spelling location of the start of the non-argument expansion 10184967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // range. This is where the macro name was spelled in order to begin 10194967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // expanding this macro. 10204967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first); 10214967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 10224967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // Dig out the buffer where the macro name was spelled and the extents of the 10234967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // name so that we can render it into the expansion note. 10244967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); 10254967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); 10264967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); 10274967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); 10284967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar} 10294967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar 1030d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rosebool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { 10319893902eceba7f01dd1521349d33866f77254d78Jordan Rose return isIdentifierBody(c, LangOpts.DollarIdents); 1032d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rose} 1033d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rose 10345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Diagnostics forwarding code. 10375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1039409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the 1040433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// lexer buffer was all expanded at a single point, perform the mapping. 1041409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// This is currently only used for _Pragma implementation, so it is the slow 1042409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// path of the hot getSourceLocation method. Do not allow it to be inlined. 104314bd96571ef6f0e97dc79ec4d01b547d60e8fa68Chandler Carruthstatic LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc( 104414bd96571ef6f0e97dc79ec4d01b547d60e8fa68Chandler Carruth Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen); 1045409a03671224d4e5bdab1594c43baf070148f830Chris Lattnerstatic SourceLocation GetMappedTokenLoc(Preprocessor &PP, 1046409a03671224d4e5bdab1594c43baf070148f830Chris Lattner SourceLocation FileLoc, 1047de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner unsigned CharNo, unsigned TokLen) { 1048433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth assert(FileLoc.isMacroID() && "Must be a macro expansion"); 10491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1050409a03671224d4e5bdab1594c43baf070148f830Chris Lattner // Otherwise, we're lexing "mapped tokens". This is used for things like 1051433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // _Pragma handling. Combine the expansion location of FileLoc with the 1052df7c17a8d02fe09a3466786bae3e40fc3252687aChris Lattner // spelling location. 1053e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner SourceManager &SM = PP.getSourceManager(); 10541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1055433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // Create a new SLoc which is expanded from Expansion(FileLoc) but whose 1056df7c17a8d02fe09a3466786bae3e40fc3252687aChris Lattner // characters come from spelling(FileLoc)+Offset. 1057e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc); 1058a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SpellingLoc = SpellingLoc.getLocWithOffset(CharNo); 10591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1060e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner // Figure out the expansion loc range, which is the range covered by the 1061e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner // original _Pragma(...) sequence. 1062e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner std::pair<SourceLocation,SourceLocation> II = 1063999f739404edf2078cf9f9c28b4dc45c19765842Chandler Carruth SM.getImmediateExpansionRange(FileLoc); 10641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1065bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen); 1066409a03671224d4e5bdab1594c43baf070148f830Chris Lattner} 1067409a03671224d4e5bdab1594c43baf070148f830Chris Lattner 10685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getSourceLocation - Return a source location identifier for the specified 10695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// offset in the current file. 1070de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris LattnerSourceLocation Lexer::getSourceLocation(const char *Loc, 1071de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner unsigned TokLen) const { 1072448cec4c1c3705f6f49ffdefb58a7329942a2dd8Chris Lattner assert(Loc >= BufferStart && Loc <= BufferEnd && 10735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer "Location out of range for this buffer!"); 10749dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner 10759dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner // In the normal case, we're just lexing from a simple file buffer, return 10769dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner // the file id from FileLoc with the offset specified. 1077448cec4c1c3705f6f49ffdefb58a7329942a2dd8Chris Lattner unsigned CharNo = Loc-BufferStart; 10789dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner if (FileLoc.isFileID()) 1079a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return FileLoc.getLocWithOffset(CharNo); 10801eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10812b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner // Otherwise, this is the _Pragma lexer case, which pretends that all of the 10822b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner // tokens are lexed from where the _Pragma was defined. 1083168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner assert(PP && "This doesn't work on raw lexers"); 1084de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen); 10855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 10865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Diag - Forwarding function for diagnostics. This translate a source 10885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// position in the current buffer into a SourceLocation object for rendering. 10893cbfe2c4159e0a219ae660d50625c013aa4afbd0Chris LattnerDiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const { 10903692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner return PP->Diag(getSourceLocation(Loc), DiagID); 10915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 10925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Trigraph and Escaped Newline Handling Code. 10955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, 10985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// return the decoded trigraph letter it corresponds to, or '\0' if nothing. 10995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic char GetTrigraphCharForLetter(char Letter) { 11005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Letter) { 11015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: return 0; 11025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '=': return '#'; 11035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ')': return ']'; 11045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '(': return '['; 11055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '!': return '|'; 11065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\'': return '^'; 11075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '>': return '}'; 11085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '/': return '\\'; 11095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '<': return '{'; 11105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '-': return '~'; 11115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 11125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 11135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 11145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// DecodeTrigraphChar - If the specified character is a legal trigraph when 11155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled, 11165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// return the result character. Finally, emit a warning about trigraph use 11175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// whether trigraphs are enabled or not. 11185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic char DecodeTrigraphChar(const char *CP, Lexer *L) { 11195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Res = GetTrigraphCharForLetter(*CP); 11203692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner if (!Res || !L) return Res; 11211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 11224e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!L->getLangOpts().Trigraphs) { 112374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 112474d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CP-2, diag::trigraph_ignored); 11253692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner return 0; 11265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 11271eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 112874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 11295f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); 11305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return Res; 11315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 11325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 113324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner/// getEscapedNewLineSize - Return the size of the specified escaped newline, 113424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a 11351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// trigraph equivalent on entry to this function. 113624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattnerunsigned Lexer::getEscapedNewLineSize(const char *Ptr) { 113724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner unsigned Size = 0; 113824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner while (isWhitespace(Ptr[Size])) { 113924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner ++Size; 11401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 114124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') 114224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner continue; 114324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 114424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // If this is a \r\n or \n\r, skip the other half. 114524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && 114624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr[Size-1] != Ptr[Size]) 114724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner ++Size; 11481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 114924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return Size; 11501eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 115224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Not an escaped newline, must be a \t or something else. 115324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return 0; 115424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner} 115524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 1156033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// SkipEscapedNewLines - If P points to an escaped newline (or a series of 1157033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// them), skip over them and return the first non-escaped-newline found, 1158033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// otherwise return P. 1159033749571f8d4c804eeb357c70b06424aa24503bChris Lattnerconst char *Lexer::SkipEscapedNewLines(const char *P) { 1160033749571f8d4c804eeb357c70b06424aa24503bChris Lattner while (1) { 1161033749571f8d4c804eeb357c70b06424aa24503bChris Lattner const char *AfterEscape; 1162033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (*P == '\\') { 1163033749571f8d4c804eeb357c70b06424aa24503bChris Lattner AfterEscape = P+1; 1164033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } else if (*P == '?') { 1165033749571f8d4c804eeb357c70b06424aa24503bChris Lattner // If not a trigraph for escape, bail out. 1166033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (P[1] != '?' || P[2] != '/') 1167033749571f8d4c804eeb357c70b06424aa24503bChris Lattner return P; 1168033749571f8d4c804eeb357c70b06424aa24503bChris Lattner AfterEscape = P+3; 1169033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } else { 1170033749571f8d4c804eeb357c70b06424aa24503bChris Lattner return P; 1171033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } 11721eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1173033749571f8d4c804eeb357c70b06424aa24503bChris Lattner unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape); 1174033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (NewLineSize == 0) return P; 1175033749571f8d4c804eeb357c70b06424aa24503bChris Lattner P = AfterEscape+NewLineSize; 1176033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } 1177033749571f8d4c804eeb357c70b06424aa24503bChris Lattner} 1178033749571f8d4c804eeb357c70b06424aa24503bChris Lattner 1179aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// \brief Checks that the given token is the first token that occurs after the 1180aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// given location (this excludes comments and whitespace). Returns the location 1181aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// immediately after the specified token. If the token is not found or the 1182aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// location is inside a macro, the returned source location will be invalid. 1183aca25bccefe56121b686706afc84c8cb5d46e65bAnna ZaksSourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, 1184aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks tok::TokenKind TKind, 1185aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const SourceManager &SM, 1186aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const LangOptions &LangOpts, 1187aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks bool SkipTrailingWhitespaceAndNewLine) { 1188aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (Loc.isMacroID()) { 118969bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) 1190aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1191aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 1192aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); 1193aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1194aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Break down the source location. 1195aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 1196aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1197aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Try to load the file buffer. 1198aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks bool InvalidTemp = false; 1199cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); 1200aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (InvalidTemp) 1201aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1202aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1203aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const char *TokenBegin = File.data() + LocInfo.second; 1204aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1205aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Lex from the start of the given location. 1206aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), 1207aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks TokenBegin, File.end()); 1208aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Find the token. 1209aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Token Tok; 1210aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks lexer.LexFromRawLexer(Tok); 1211aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (Tok.isNot(TKind)) 1212aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1213aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks SourceLocation TokenLoc = Tok.getLocation(); 1214aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1215aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Calculate how much whitespace needs to be skipped if any. 1216aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks unsigned NumWhitespaceChars = 0; 1217aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (SkipTrailingWhitespaceAndNewLine) { 1218aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const char *TokenEnd = SM.getCharacterData(TokenLoc) + 1219aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Tok.getLength(); 1220aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks unsigned char C = *TokenEnd; 1221aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks while (isHorizontalWhitespace(C)) { 1222aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks C = *(++TokenEnd); 1223aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks NumWhitespaceChars++; 1224aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 122535a2b798efd61fec425553f387d76be9c522f184Eli Friedman 122635a2b798efd61fec425553f387d76be9c522f184Eli Friedman // Skip \r, \n, \r\n, or \n\r 122735a2b798efd61fec425553f387d76be9c522f184Eli Friedman if (C == '\n' || C == '\r') { 122835a2b798efd61fec425553f387d76be9c522f184Eli Friedman char PrevC = C; 122935a2b798efd61fec425553f387d76be9c522f184Eli Friedman C = *(++TokenEnd); 1230aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks NumWhitespaceChars++; 123135a2b798efd61fec425553f387d76be9c522f184Eli Friedman if ((C == '\n' || C == '\r') && C != PrevC) 123235a2b798efd61fec425553f387d76be9c522f184Eli Friedman NumWhitespaceChars++; 123335a2b798efd61fec425553f387d76be9c522f184Eli Friedman } 1234aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 1235aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1236a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); 1237aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks} 123824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 12395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, 12405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// get its size, and return it. This is tricky in several cases: 12415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 1. If currently at the start of a trigraph, we warn about the trigraph, 12425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// then either return the trigraph (skipping 3 chars) or the '?', 12435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// depending on whether trigraphs are enabled or not. 12445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 2. If this is an escaped newline (potentially with whitespace between 12455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the backslash and newline), implicitly skip the newline and return 12465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the char after it. 12475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// This handles the slow/uncommon case of the getCharAndSize method. Here we 12495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// know that we can accumulate into Size, and that we have already incremented 12505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Ptr by Size bytes. 12515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should 12535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// be updated to match. 12545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerchar Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, 1256d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token *Tok) { 12575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, look for an escaped newline. 12585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '\\') { 12595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 12605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Ptr; 12615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerSlash: 12625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Common case, backslash-char where the char is not whitespace. 12635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (!isWhitespace(Ptr[0])) return '\\'; 12641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12655636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner // See if we have optional whitespace characters between the slash and 12665636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner // newline. 126724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { 126824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Remember that this token needs to be cleaned. 126924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (Tok) Tok->setFlag(Token::NeedsCleaning); 127024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 127124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Warn if there was whitespace between the backslash and newline. 12725636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode()) 127324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Diag(Ptr, diag::backslash_newline_space); 12741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 127524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Found backslash<whitespace><newline>. Parse the char after it. 127624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Size += EscapedNewLineSize; 127724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr += EscapedNewLineSize; 1278f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 127904a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // If the char that we finally got was a \n, then we must have had 128004a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // something like \<newline><newline>. We don't want to consume the 128104a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // second newline. 128204a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') 128304a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis return ' '; 1284f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 128524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Use slow version to accumulate a correct size field. 128624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return getCharAndSizeSlow(Ptr, Size, Tok); 128724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner } 12881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is not an escaped newline, just return the slash. 12905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return '\\'; 12915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 12921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a trigraph, process it. 12945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '?' && Ptr[1] == '?') { 12955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is actually a legal trigraph (not something like "??x"), emit 12965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // a trigraph warning. If so, and if trigraphs are enabled, return it. 12976bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : nullptr)) { 12985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Remember that this token needs to be cleaned. 1299d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner if (Tok) Tok->setFlag(Token::NeedsCleaning); 13005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Ptr += 3; 13025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Size += 3; 13035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\\') goto Slash; 13045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return C; 13055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is neither, return a single character. 13095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return *Ptr; 13115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 13125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the 13155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, 13165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// and that we have already incremented Ptr by Size bytes. 13175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 13185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// NOTE: When this method is updated, getCharAndSizeSlow (above) should 13195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// be updated to match. 13205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerchar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, 13214e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 13225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, look for an escaped newline. 13235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '\\') { 13245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Ptr; 13265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerSlash: 13275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Common case, backslash-char where the char is not whitespace. 13285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (!isWhitespace(Ptr[0])) return '\\'; 13291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // See if we have optional whitespace characters followed by a newline. 133124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { 133224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Found backslash<whitespace><newline>. Parse the char after it. 133324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Size += EscapedNewLineSize; 133424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr += EscapedNewLineSize; 13351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 133604a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // If the char that we finally got was a \n, then we must have had 133704a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // something like \<newline><newline>. We don't want to consume the 133804a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // second newline. 133904a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') 134004a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis return ' '; 1341f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 134224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Use slow version to accumulate a correct size field. 13434e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); 134424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner } 13451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is not an escaped newline, just return the slash. 13475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return '\\'; 13485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a trigraph, process it. 13514e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { 13525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is actually a legal trigraph (not something like "??x"), return 13535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it. 13545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (char C = GetTrigraphCharForLetter(Ptr[2])) { 13555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Ptr += 3; 13565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Size += 3; 13575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\\') goto Slash; 13585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return C; 13595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is neither, return a single character. 13635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return *Ptr; 13655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 13665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 13685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Helper methods for lexing. 13695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 13705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1371f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor/// \brief Routine that indiscriminately skips bytes in the source file. 1372f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregorvoid Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { 1373f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor BufferPtr += Bytes; 1374f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor if (BufferPtr > BufferEnd) 1375f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor BufferPtr = BufferEnd; 1376d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // FIXME: What exactly does the StartOfLine bit mean? There are two 1377d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // possible meanings for the "start" of the line: the first token on the 1378d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // unexpanded line, or the first token on the expanded line. 1379f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor IsAtStartOfLine = StartOfLine; 1380d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = StartOfLine; 1381f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor} 1382f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor 1383ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { 138487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar if (LangOpts.AsmPreprocessor) { 138587d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar return false; 138687d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { 1387263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C11AllowedIDChars( 1388263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C11AllowedIDCharRanges); 1389263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return C11AllowedIDChars.contains(C); 1390263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (LangOpts.CPlusPlus) { 1391263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( 1392263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko CXX03AllowedIDCharRanges); 1393263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return CXX03AllowedIDChars.contains(C); 1394263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else { 1395263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99AllowedIDChars( 1396263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99AllowedIDCharRanges); 1397263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return C99AllowedIDChars.contains(C); 1398263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } 1399c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 1400c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1401ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { 1402ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose assert(isAllowedIDChar(C, LangOpts)); 140387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar if (LangOpts.AsmPreprocessor) { 140487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar return false; 140587d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { 1406263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( 1407263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C11DisallowedInitialIDCharRanges); 1408263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return !C11DisallowedInitialIDChars.contains(C); 1409263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (LangOpts.CPlusPlus) { 1410ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose return true; 1411263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else { 1412263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( 1413263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99DisallowedInitialIDCharRanges); 1414263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return !C99DisallowedInitialIDChars.contains(C); 1415263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } 1416ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose} 1417ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1418ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, 1419ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose const char *End) { 1420ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose return CharSourceRange::getCharRange(L.getSourceLocation(Begin), 1421ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose L.getSourceLocation(End)); 1422ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose} 1423ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1424ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, 1425ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CharSourceRange Range, bool IsFirst) { 1426ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // Check C99 compatibility. 1427c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) { 1428ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose enum { 1429ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CannotAppearInIdentifier = 0, 1430ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CannotStartIdentifier 1431ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose }; 1432ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1433263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99AllowedIDChars( 1434263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99AllowedIDCharRanges); 1435263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( 1436263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99DisallowedInitialIDCharRanges); 1437263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (!C99AllowedIDChars.contains(C)) { 1438ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) 1439ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range 1440ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << CannotAppearInIdentifier; 1441263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) { 1442ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) 1443ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range 1444ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << CannotStartIdentifier; 1445ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1446c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 1447c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1448ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // Check C++98 compatibility. 1449c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines if (!Diags.isIgnored(diag::warn_cxx98_compat_unicode_id, Range.getBegin())) { 1450263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( 1451263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko CXX03AllowedIDCharRanges); 1452263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (!CXX03AllowedIDChars.contains(C)) { 1453ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) 1454ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range; 1455ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1456ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1457651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 1458651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1459651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesbool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, 1460651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Token &Result) { 1461651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines const char *UCNPtr = CurPtr + Size; 14626bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/nullptr); 1463651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) 1464651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return false; 1465651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1466651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!isLexingRawMode()) 1467651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, 1468651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines makeCharRange(*this, CurPtr, UCNPtr), 1469651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /*IsFirst=*/false); 1470651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1471651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.setFlag(Token::HasUCN); 1472651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || 1473651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) 1474651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines CurPtr = UCNPtr; 1475651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else 1476651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines while (CurPtr != UCNPtr) 1477651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (void)getAndAdvanceChar(CurPtr, Result); 1478651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return true; 1479651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 1480651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1481651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesbool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { 1482651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines const char *UnicodePtr = CurPtr; 1483651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines UTF32 CodePoint; 1484651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ConversionResult Result = 1485651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, 1486651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines (const UTF8 *)BufferEnd, 1487651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines &CodePoint, 1488651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines strictConversion); 1489651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (Result != conversionOK || 1490651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) 1491651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return false; 1492651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1493651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!isLexingRawMode()) 1494651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, 1495651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines makeCharRange(*this, CurPtr, UnicodePtr), 1496651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines /*IsFirst=*/false); 1497651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1498651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines CurPtr = UnicodePtr; 1499651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return true; 1500651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 1501c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1502d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexIdentifier(Token &Result, const char *CurPtr) { 15035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] 15045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned Size; 15055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned char C = *CurPtr++; 1506cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner while (isIdentifierBody(C)) 15075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 1508cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner 15095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; // Back up over the skipped character. 15105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Fast path, no $,\,? in identifier found. '\' might be an escaped newline 15125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. 1513cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner // 15149893902eceba7f01dd1521349d33866f77254d78Jordan Rose // TODO: Could merge these checks into an InfoTable flag to make the 15159893902eceba7f01dd1521349d33866f77254d78Jordan Rose // comparison cheaper 1516c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (isASCII(C) && C != '\\' && C != '?' && 1517c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (C != '$' || !LangOpts.DollarIdents)) { 15185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerFinishIdentifier: 15195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *IdStart = BufferPtr; 1520c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara FormTokenWithChars(Result, CurPtr, tok::raw_identifier); 1521c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara Result.setRawIdentifierData(IdStart); 15221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are in raw mode, return this identifier raw. There is no need to 15245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // look up identifier information or attempt to macro expand it. 1525c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (LexingRawMode) 1526d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 15271eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1528c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // Fill in Result.IdentifierInfo and update the token kind, 1529c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // looking up the identifier in the identifier table. 1530c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); 15311eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Finally, now that we know we have an identifier, pass this off to the 15335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // preprocessor, which may macro expand it or something. 1534d1186fa38166a581b51975f0382a45fc3a0733d0Chris Lattner if (II->isHandleIdentifierCase()) 1535d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return PP->HandleIdentifier(Result); 15366aa52ec6b969faabf3764baf79d89810b8249a7eDouglas Gregor 1537d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 15385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15391eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, $,\,? in identifier found. Enter slower path. 15411eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 15445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '$') { 15455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we hit a $ and they are not supported in identifiers, we are done. 15464e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!LangOpts.DollarIdents) goto FinishIdentifier; 15471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, emit a diagnostic and continue. 154974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 155074d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr, diag::ext_dollar_in_identifier); 15515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer continue; 1554c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1555651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) { 1556c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose C = getCharAndSize(CurPtr, Size); 1557c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose continue; 1558651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) { 1559c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose C = getCharAndSize(CurPtr, Size); 1560c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose continue; 1561c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else if (!isIdentifierBody(C)) { 15625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer goto FinishIdentifier; 15635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this character is good, consume it. 15665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 1569c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose while (isIdentifierBody(C)) { 15705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 15755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1576a75ec43d625753b4439b0d6f70bd988444c74617Douglas Gregor/// isHexaLiteral - Return true if Start points to a hex constant. 15774a551000bee716ac8b1bbe16134a53f0ad221a5aChris Lattner/// in microsoft mode (where this is supposed to be several different tokens). 1578e506f8a41063410c75945ebb804758bd0202947fEli Friedmanbool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { 15796ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner unsigned Size; 15804e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); 15816ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner if (C1 != '0') 15826ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner return false; 15834e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); 15846ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner return (C2 == 'x' || C2 == 'X'); 1585a75ec43d625753b4439b0d6f70bd988444c74617Douglas Gregor} 15865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15875253c7ff266ae79308050c9f43d60dd1a67c5fb9Nate Begeman/// LexNumericConstant - Lex the remainder of a integer or floating point 15885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// constant. From[-1] is the first character lexed. Return the end of the 15895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// constant. 1590d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { 15915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned Size; 15925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getCharAndSize(CurPtr, Size); 15935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char PrevCh = 0; 1594651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines while (isPreprocessingNumberBody(C)) { 15955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer PrevCh = C; 15975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 16005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we fell out, check for a sign, due to 1e+12. If we have one, continue. 1601b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { 1602b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner // If we are in Microsoft mode, don't continue if the constant is hex. 1603b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1 16044e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts)) 1605b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); 1606b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner } 16075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 16085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a hex FP constant, continue. 1609d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { 16104967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar // Outside C99 and C++17, we accept hexadecimal floating point numbers as a 1611d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith // not-quite-conforming extension. Only do so if this looks like it's 1612d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith // actually meant to be a hexfloat, and not if it has a ud-suffix. 1613d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith bool IsHexFloat = true; 1614d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (!LangOpts.C99) { 1615d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (!isHexaLiteral(BufferPtr, LangOpts)) 1616d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith IsHexFloat = false; 16174967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar else if (!getLangOpts().CPlusPlus1z && 16184967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar std::find(BufferPtr, CurPtr, '_') != CurPtr) 1619d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith IsHexFloat = false; 1620d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith } 1621d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (IsHexFloat) 1622d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); 1623d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith } 16241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1625859b6227694033dd6eaf3991a2b80877a406c382Richard Smith // If we have a digit separator, continue. 1626176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines if (C == '\'' && getLangOpts().CPlusPlus14) { 1627859b6227694033dd6eaf3991a2b80877a406c382Richard Smith unsigned NextSize; 1628859b6227694033dd6eaf3991a2b80877a406c382Richard Smith char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts()); 16299a17677bb8a332e873137187aba91f64d3fd16a0Richard Smith if (isIdentifierBody(Next)) { 1630859b6227694033dd6eaf3991a2b80877a406c382Richard Smith if (!isLexingRawMode()) 1631859b6227694033dd6eaf3991a2b80877a406c382Richard Smith Diag(CurPtr, diag::warn_cxx11_compat_digit_separator); 1632859b6227694033dd6eaf3991a2b80877a406c382Richard Smith CurPtr = ConsumeChar(CurPtr, Size, Result); 1633651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines CurPtr = ConsumeChar(CurPtr, NextSize, Result); 1634859b6227694033dd6eaf3991a2b80877a406c382Richard Smith return LexNumericConstant(Result, CurPtr); 1635859b6227694033dd6eaf3991a2b80877a406c382Richard Smith } 1636859b6227694033dd6eaf3991a2b80877a406c382Richard Smith } 1637859b6227694033dd6eaf3991a2b80877a406c382Richard Smith 1638651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue. 1639651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) 1640651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return LexNumericConstant(Result, CurPtr); 1641651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) 1642651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return LexNumericConstant(Result, CurPtr); 1643651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 16445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 164547246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 16469e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::numeric_constant); 164747246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1648d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 16495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 16505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 16515cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes 1652e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith/// in C++11, or warn on a ud-suffix in C++98. 16534ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smithconst char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, 16544ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith bool IsStringLiteral) { 16554e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie assert(getLangOpts().CPlusPlus); 16565cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 1657651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // Maximally munch an identifier. 16585cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith unsigned Size; 16595cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith char C = getCharAndSize(CurPtr, Size); 1660651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines bool Consumed = false; 1661651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1662651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!isIdentifierHead(C)) { 1663651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) 1664651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Consumed = true; 1665651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) 1666651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Consumed = true; 1667651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else 16682fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith return CurPtr; 1669651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } 16702fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith 1671651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!getLangOpts().CPlusPlus11) { 1672651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!isLexingRawMode()) 1673651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Diag(CurPtr, 1674651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines C == '_' ? diag::warn_cxx11_compat_user_defined_literal 1675651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines : diag::warn_cxx11_compat_reserved_user_defined_literal) 1676651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); 1677651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return CurPtr; 1678651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } 1679651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1680651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix 1681651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // that does not start with an underscore is ill-formed. As a conforming 1682651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // extension, we treat all such suffixes as if they had whitespace before 1683651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // them. We assume a suffix beginning with a UCN or UTF-8 character is more 1684651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // likely to be a ud-suffix than a macro, however, and accept that. 1685651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (!Consumed) { 16864ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith bool IsUDSuffix = false; 16874ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith if (C == '_') 16884ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith IsUDSuffix = true; 1689176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines else if (IsStringLiteral && getLangOpts().CPlusPlus14) { 169006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // In C++1y, we need to look ahead a few characters to see if this is a 169106dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // valid suffix for a string literal or a numeric literal (this could be 169206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // the 'operator""if' defining a numeric literal operator). 16936fde25e14e8d89080fb9f32b7c0d65f869bb06eaRichard Smith const unsigned MaxStandardSuffixLength = 3; 169406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith char Buffer[MaxStandardSuffixLength] = { C }; 169506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned Consumed = Size; 169606dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned Chars = 1; 169706dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith while (true) { 169806dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned NextSize; 169906dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, 170006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith getLangOpts()); 170106dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith if (!isIdentifierBody(Next)) { 170206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // End of suffix. Check whether this is on the whitelist. 170306dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith IsUDSuffix = (Chars == 1 && Buffer[0] == 's') || 170406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith NumericLiteralParser::isValidUDSuffix( 170506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith getLangOpts(), StringRef(Buffer, Chars)); 170606dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith break; 170706dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith } 170806dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith 170906dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith if (Chars == MaxStandardSuffixLength) 171006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // Too long: can't be a standard suffix. 171106dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith break; 171206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith 171306dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith Buffer[Chars++] = Next; 171406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith Consumed += NextSize; 171506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith } 17164ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith } 17174ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith 17184ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith if (!IsUDSuffix) { 17192fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith if (!isLexingRawMode()) 1720651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Diag(CurPtr, getLangOpts().MSVCCompat 1721651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines ? diag::ext_ms_reserved_user_defined_literal 1722651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines : diag::ext_reserved_user_defined_literal) 1723e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); 1724e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith return CurPtr; 1725e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith } 1726e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith 1727651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines CurPtr = ConsumeChar(CurPtr, Size, Result); 17285cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith } 1729651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 1730651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.setFlag(Token::HasUDSuffix); 1731651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines while (true) { 1732651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines C = getCharAndSize(CurPtr, Size); 1733651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); } 1734651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {} 1735651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {} 1736651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines else break; 1737651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } 1738651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 17395cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith return CurPtr; 17405cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith} 17415cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 17425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexStringLiteral - Lex the remainder of a string literal, after having lexed 17435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor/// either " or L" or u8" or u" or U". 1744d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, 17455cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::TokenKind Kind) { 17466bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines // Does this string contain the \0 character? 17476bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines const char *NulCharacter = nullptr; 17481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1749661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode() && 1750661a99690bc133bbaa029da925481d4a860dec90Richard Smith (Kind == tok::utf8_string_literal || 1751661a99690bc133bbaa029da925481d4a860dec90Richard Smith Kind == tok::utf16_string_literal || 1752d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith Kind == tok::utf32_string_literal)) 1753d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith Diag(BufferPtr, getLangOpts().CPlusPlus 1754d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith ? diag::warn_cxx98_compat_unicode_literal 1755d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith : diag::warn_c99_compat_unicode_literal); 1756661a99690bc133bbaa029da925481d4a860dec90Richard Smith 17575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 17585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '"') { 1759571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner // Skip escaped characters. Escaped newlines will already be processed by 1760571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner // getAndAdvanceChar. 1761571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner if (C == '\\') 17625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 176333611e0d5ab1372608a7649b1877cd4300621c71Douglas Gregor 1764571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner if (C == '\n' || C == '\r' || // Newline. 176533611e0d5ab1372608a7649b1877cd4300621c71Douglas Gregor (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. 17664e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 176787d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1; 17689e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1769d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 1771571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner 17727d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (C == 0) { 17737d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 17747d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 17757d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1776d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman cutOffLexing(); 1777d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17787d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 17797d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 1780571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner NulCharacter = CurPtr-1; 17817d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 17825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 17835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 17841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 17855cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 17864e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 17874ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, true); 17885cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 17895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If a nul character existed in the string, warn about it. 179074d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 179187d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar Diag(NulCharacter, diag::null_in_char_or_string) << 1; 17925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 17935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of the token as well as the BufferPtr instance var. 179447246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 17955cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor FormTokenWithChars(Result, CurPtr, Kind); 179647246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1797d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 17995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 18002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// LexRawStringLiteral - Lex the remainder of a raw string literal, after 18012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// having lexed R", LR", u8R", uR", or UR". 1802d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, 18032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::TokenKind Kind) { 18042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3: 18052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Between the initial and final double quote characters of the raw string, 18062fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // any transformations performed in phases 1 and 2 (trigraphs, 18072fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // universal-character-names, and line splicing) are reverted. 18082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 1809661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode()) 1810661a99690bc133bbaa029da925481d4a860dec90Richard Smith Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal); 1811661a99690bc133bbaa029da925481d4a860dec90Richard Smith 18122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper unsigned PrefixLen = 0; 18132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) 18152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ++PrefixLen; 18162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // If the last character was not a '(', then we didn't lex a valid delimiter. 18182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (CurPtr[PrefixLen] != '(') { 18192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (!isLexingRawMode()) { 18202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *PrefixEnd = &CurPtr[PrefixLen]; 18212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (PrefixLen == 16) { 18222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(PrefixEnd, diag::err_raw_delim_too_long); 18232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } else { 18242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(PrefixEnd, diag::err_invalid_char_raw_delim) 18252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper << StringRef(PrefixEnd, 1); 18262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Search for the next '"' in hopes of salvaging the lexer. Unfortunately, 18302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // it's possible the '"' was intended to be part of the raw string, but 18312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // there's not much we can do about that. 18322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (1) { 18332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char C = *CurPtr++; 18342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == '"') 18362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 18372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == 0 && CurPtr-1 == BufferEnd) { 18382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper --CurPtr; 18392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 18402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr, tok::unknown); 1844d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Save prefix and move CurPtr past it 18482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *Prefix = CurPtr; 18492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper CurPtr += PrefixLen + 1; // skip over prefix and '(' 18502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (1) { 18522fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char C = *CurPtr++; 18532fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18542fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == ')') { 18552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Check for prefix match and closing quote. 18562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') { 18572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper CurPtr += PrefixLen + 1; // skip over prefix and '"' 18582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 18592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. 18612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (!isLexingRawMode()) 18622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(BufferPtr, diag::err_unterminated_raw_string) 18632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper << StringRef(Prefix, PrefixLen); 18642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1865d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18695cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 18704e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 18714ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, true); 18725cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 18732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Update the location of token as well as BufferPtr. 18742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *TokStart = BufferPtr; 18752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr, Kind); 18762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Result.setLiteralData(TokStart); 1877d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper} 18792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexAngledStringLiteral - Lex the remainder of an angled string literal, 18815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// after having lexed the '<' character. This is used for #include filenames. 1882d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { 18836bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines // Does this string contain the \0 character? 18846bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines const char *NulCharacter = nullptr; 18859cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner const char *AfterLessPos = CurPtr; 18865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 18875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '>') { 18885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip escaped characters. 1889b6d6993e6e6d3daf4d9876794254d20a134e37c2Pirama Arumuga Nainar if (C == '\\' && CurPtr < BufferEnd) { 18905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip the escaped character. 189160b202c5eb6fb2d608bfef05523f40fdf5091d48Dmitri Gribenko getAndAdvanceChar(CurPtr, Result); 18925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == '\n' || C == '\r' || // Newline. 18937d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis (C == 0 && (CurPtr-1 == BufferEnd || // End of file. 18947d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis isCodeCompletionPoint(CurPtr-1)))) { 18959cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner // If the filename is unterminated, then it must just be a lone < 18969cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner // character. Return this as such. 18979cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner FormTokenWithChars(Result, AfterLessPos, tok::less); 1898d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == 0) { 19005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer NulCharacter = CurPtr-1; 19015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 19035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If a nul character existed in the string, warn about it. 190674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 190787d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar Diag(NulCharacter, diag::null_in_char_or_string) << 1; 19081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 191047246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 19119e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); 191247246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1913d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 19155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexCharConstant - Lex the remainder of a character constant, after having 1918176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines/// lexed either ' or L' or u8' or u' or U'. 1919d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexCharConstant(Token &Result, const char *CurPtr, 19205cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::TokenKind Kind) { 19216bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines // Does this character contain the \0 character? 19226bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines const char *NulCharacter = nullptr; 19235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1924176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines if (!isLexingRawMode()) { 1925176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant) 1926176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines Diag(BufferPtr, getLangOpts().CPlusPlus 1927176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines ? diag::warn_cxx98_compat_unicode_literal 1928176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines : diag::warn_c99_compat_unicode_literal); 1929176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines else if (Kind == tok::utf8_char_constant) 1930176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal); 1931176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines } 1932661a99690bc133bbaa029da925481d4a860dec90Richard Smith 19335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 19345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\'') { 19354e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 1936b6ebd4490235c9ea6016530d623c46d0b9ce565bRichard Smith Diag(BufferPtr, diag::ext_empty_character); 19379e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 1938d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1941d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner while (C != '\'') { 1942d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner // Skip escaped characters. 19436d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == '\\') 19446d926ae667cca926e77ddce1734514911706ed0aNico Weber C = getAndAdvanceChar(CurPtr, Result); 19456d926ae667cca926e77ddce1734514911706ed0aNico Weber 19466d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == '\n' || C == '\r' || // Newline. 19476d926ae667cca926e77ddce1734514911706ed0aNico Weber (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. 19484e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 194987d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0; 1950d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1951d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19526d926ae667cca926e77ddce1734514911706ed0aNico Weber } 19536d926ae667cca926e77ddce1734514911706ed0aNico Weber 19546d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == 0) { 19557d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 19567d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 19577d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1958d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman cutOffLexing(); 1959d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19607d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 19617d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 1962d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner NulCharacter = CurPtr-1; 1963d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner } 1964d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner C = getAndAdvanceChar(CurPtr, Result); 19655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19661eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19675cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 19684e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 19694ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, false); 19705cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 1971d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner // If a nul character existed in the character, warn about it. 197274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 197387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar Diag(NulCharacter, diag::null_in_char_or_string) << 0; 19745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 197647246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 19775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor FormTokenWithChars(Result, CurPtr, Kind); 197847246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1979d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 19815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// SkipWhitespace - Efficiently skip over a series of whitespace characters. 19835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Update BufferPtr to point to the next non-whitespace character and return. 1984d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// 1985d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// This method forms a token and returns true if KeepWhitespaceMode is enabled. 1986d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// 1987d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, 1988d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 19895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Whitespace - Skip it, then return the token after the whitespace. 19906aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose bool SawNewline = isVerticalWhitespace(CurPtr[-1]); 19916aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 19928f1900376906c51aabf5fd18a39524e2318276baRichard Smith unsigned char Char = *CurPtr; 19938f1900376906c51aabf5fd18a39524e2318276baRichard Smith 19948f1900376906c51aabf5fd18a39524e2318276baRichard Smith // Skip consecutive spaces efficiently. 19955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 19965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip horizontal whitespace very aggressively. 19975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (isHorizontalWhitespace(Char)) 19985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = *++CurPtr; 19991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2000ddd3e8b90a92c39f620bda7a0945320c0b9e60dbDaniel Dunbar // Otherwise if we have something other than whitespace, we're done. 20016aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose if (!isVerticalWhitespace(Char)) 20025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 20031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 20055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // End of preprocessor directive line, let LexTokenInternal handle this. 20065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2007d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return false; 20085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20108f1900376906c51aabf5fd18a39524e2318276baRichard Smith // OK, but handle newline. 20116aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SawNewline = true; 20125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = *++CurPtr; 20135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2015d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // If the client wants us to return whitespace, return it now. 2016d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner if (isKeepWhitespaceMode()) { 20179e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 2018d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SawNewline) { 20196aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose IsAtStartOfLine = true; 2020d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 2021d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 20226aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // FIXME: The next token will not have LeadingSpace set. 2023d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return true; 2024d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner } 20251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20266aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // If this isn't immediately after a newline, there is leading space. 20276aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose char PrevChar = CurPtr[-1]; 20286aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); 20296aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 20306aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 2031d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SawNewline) { 20326aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose Result.setFlag(Token::StartOfLine); 2033d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine = true; 2034d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 20356aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 20365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2037d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return false; 20385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 20395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2040bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// We have just read the // characters from input. Skip until we find the 2041bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// newline character thats terminate the comment. Then update BufferPtr and 2042bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// return. 2043046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// 2044046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// If we're in KeepCommentMode or any CommentHandler has inserted 2045046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// some tokens, this will store the first token and return true. 2046d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipLineComment(Token &Result, const char *CurPtr, 2047d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 2048bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // If Line comments aren't explicitly enabled for this language, emit an 20495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // extension warning. 2050bb23628148f555a4cf71f98c27096a7a804c085cNico Weber if (!LangOpts.LineComment && !isLexingRawMode()) { 2051bb23628148f555a4cf71f98c27096a7a804c085cNico Weber Diag(BufferPtr, diag::ext_line_comment); 20521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Mark them enabled so we only emit one warning for this translation 20545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // unit. 2055bb23628148f555a4cf71f98c27096a7a804c085cNico Weber LangOpts.LineComment = true; 20565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan over the body of the comment. The common case, when scanning, is that 20595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the comment contains normal ascii characters with nothing interesting in 20605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // them. As such, optimize for this case with the inner loop. 20615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C; 20625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer do { 20635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr; 20645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip over characters in the fast loop. 20655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != 0 && // Potentially EOF. 20665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C != '\n' && C != '\r') // Newline or DOS-style newline. 20675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *++CurPtr; 20685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 20691daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer const char *NextLine = CurPtr; 20701daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (C != 0) { 20711daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // We found a newline, see if it's escaped. 20721daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer const char *EscapePtr = CurPtr-1; 2073651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines bool HasSpace = false; 2074651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace. 20751daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer --EscapePtr; 2076651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines HasSpace = true; 2077651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines } 20781daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer 20791daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (*EscapePtr == '\\') // Escaped newline. 20801daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = EscapePtr; 20811daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && 20821daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer EscapePtr[-2] == '?') // Trigraph-escaped newline. 20831daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = EscapePtr-2; 20841daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer else 20851daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer break; // This is a newline, we're done. 2086651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 2087651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // If there was space between the backslash and newline, warn about it. 2088651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (HasSpace && !isLexingRawMode()) 2089651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Diag(EscapePtr, diag::backslash_newline_space); 20901daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer } 20911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to 2093bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // properly decode the character. Read it in raw mode to avoid emitting 2094bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // diagnostics about things like trigraphs. If we see an escaped newline, 2095bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // we'll handle it below. 20965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *OldPtr = CurPtr; 2097bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner bool OldRawMode = isLexingRawMode(); 2098bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner LexingRawMode = true; 20995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 2100bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner LexingRawMode = OldRawMode; 2101ead616c5d8681a41b11273327813e61bda01907aChris Lattner 21021daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // If we only read only one character, then no special handling is needed. 21031daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // We're done and can skip forward to the newline. 21041daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (C != 0 && CurPtr == OldPtr+1) { 21051daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = NextLine; 21061daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer break; 21071daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer } 21081daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer 21095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we read multiple characters, and one of those characters was a \r or 21105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n, then we had an escaped newline within the comment. Emit diagnostic 21115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // unless the next line is also a // comment. 21125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { 21135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (; OldPtr != CurPtr; ++OldPtr) 21145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { 21155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Okay, we found a // comment that ends in a newline, if the next 21165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // line is also a // comment, but has spaces, don't emit a diagnostic. 21175d6ae288bc661572e52ea71fc14b249eb5e2d9e9Benjamin Kramer if (isWhitespace(C)) { 21185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *ForwardPtr = CurPtr; 21195d6ae288bc661572e52ea71fc14b249eb5e2d9e9Benjamin Kramer while (isWhitespace(*ForwardPtr)) // Skip whitespace. 21205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++ForwardPtr; 21215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') 21225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 21235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 212574d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 2126bb23628148f555a4cf71f98c27096a7a804c085cNico Weber Diag(OldPtr-1, diag::ext_multi_line_line_comment); 21275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 21285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 213155817afdf9d453a443262a733f6caf6692dca118Douglas Gregor if (CurPtr == BufferEnd+1) { 213255817afdf9d453a443262a733f6caf6692dca118Douglas Gregor --CurPtr; 213355817afdf9d453a443262a733f6caf6692dca118Douglas Gregor break; 213455817afdf9d453a443262a733f6caf6692dca118Douglas Gregor } 21357d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 21367d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { 21377d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 21387d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 21397d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 21407d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 21417d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 21425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } while (C != '\n' && C != '\r'); 21435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 21443d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // Found but did not consume the newline. Notify comment handlers about the 21453d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // comment unless we're in a #if 0 block. 21463d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner if (PP && !isLexingRawMode() && 21473d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), 21483d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner getSourceLocation(CurPtr)))) { 2149046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner BufferPtr = CurPtr; 2150046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner return true; // A token has to be returned. 2151046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner } 21521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are returning comments as tokens, return this comment as a token. 2154fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner if (inKeepCommentMode()) 2155bb23628148f555a4cf71f98c27096a7a804c085cNico Weber return SaveLineComment(Result, CurPtr); 21565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 21575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are inside a preprocessor directive and we see the end of line, 215884021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // return immediately, so that the lexer can return this as an EOD token. 21595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { 21605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 21612d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 21625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, eat the \n character. We don't care if this is a \n\r or 21657a4f004a77421af876152281b92c4f8ea24afb2dChris Lattner // \r\n sequence. This is an efficiency hack (because we know the \n can't 2166d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // contribute to another token), it isn't needed for correctness. Note that 2167d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // this is ok even in KeepWhitespaceMode, because we would have returned the 2168d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner /// comment above in that mode. 21695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 21701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // The next returned token is at the start of the line. 2172d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::StartOfLine); 2173d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine = true; 21745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // No leading whitespace seen so far. 2175d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::LeadingSpace); 21765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 21772d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 21785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 21795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2180bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// If in save-comment mode, package up this Line comment in an appropriate 2181bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// way and return it. 2182bb23628148f555a4cf71f98c27096a7a804c085cNico Weberbool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { 21839e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // If we're not in a preprocessor directive, just return the // comment 21849e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // directly. 21859e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::comment); 21861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21878c0b3787e7ccc7978b42dfbb84da2b802c743a5dDavid Blaikie if (!ParsingPreprocessorDirective || LexingRawMode) 21889e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner return true; 21891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2190bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // If this Line-style comment is in a macro definition, transmogrify it into 21919e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // a C-style block comment. 2192453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor bool Invalid = false; 2193453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor std::string Spelling = PP->getSpelling(Result, &Invalid); 2194453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor if (Invalid) 2195453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor return true; 2196453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor 2197bb23628148f555a4cf71f98c27096a7a804c085cNico Weber assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); 21989e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Spelling[1] = '*'; // Change prefix to "/*". 21999e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Spelling += "*/"; // add suffix. 22001eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22019e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Result.setKind(tok::comment); 2202374b3837d676133fcc1eb70a25c8baf8ec4a5c4aDmitri Gribenko PP->CreateString(Spelling, Result, 2203a08529cc3f00e0b47a3c028823634129ac46847bAbramo Bagnara Result.getLocation(), Result.getLocation()); 22042d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return true; 22055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 22065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 22075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline 220880d7c52653800d3338cca2c4388348010d2b1921David Blaikie/// character (either \\n or \\r) is part of an escaped newline sequence. Issue 220980d7c52653800d3338cca2c4388348010d2b1921David Blaikie/// a diagnostic if so. We know that the newline is inside of a block comment. 22101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpstatic bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, 22115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Lexer *L) { 22125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); 22131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Back up off the newline. 22155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 22161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a two-character newline sequence, skip the other character. 22185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { 22195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n\n or \r\r -> not escaped newline. 22205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == CurPtr[1]) 22215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 22225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n\r or \r\n -> skip the newline. 22235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 22245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 22251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have horizontal whitespace, skip over it. We allow whitespace 22275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // between the slash and newline. 22285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer bool HasSpace = false; 22295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { 22305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 22315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer HasSpace = true; 22325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 22331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, we know this is an escaped newline. 22355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (*CurPtr == '\\') { 22365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[-1] != '*') return false; 22375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 22385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It isn't a slash, is it the ?? / trigraph? 22395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || 22405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[-3] != '*') 22415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 22421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // This is the trigraph ending the comment. Emit a stern warning! 22445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr -= 2; 22455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 22465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If no trigraphs are enabled, warn that we ignored this trigraph and 22475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // ignore this * character. 22484e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!L->getLangOpts().Trigraphs) { 224974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 225074d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::trigraph_ignored_block_comment); 22515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 22525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 225374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 225474d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::trigraph_ends_block_comment); 22555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 22561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Warn about having an escaped newline between the */ characters. 225874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 225974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::escaped_newline_block_comment_end); 22601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If there was space between the backslash and newline, warn about it. 226274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (HasSpace && !L->isLexingRawMode()) 226374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::backslash_newline_space); 22641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return true; 22665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 22675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 22685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef __SSE2__ 22695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include <emmintrin.h> 22705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#elif __ALTIVEC__ 22715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include <altivec.h> 22725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#undef bool 22735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#endif 22745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2275ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// We have just read from input the / and * characters that started a comment. 2276ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// Read until we find the * and / characters that terminate the comment. 2277ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// Note that we don't bother decoding trigraphs or escaped newlines in block 2278ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// comments, because they cannot cause the comment to end. The only thing 2279ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// that can happen is the comment could end with an escaped newline between 2280ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// the terminating * and /. 22812d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner/// 2282046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// If we're in KeepCommentMode or any CommentHandler has inserted 2283046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// some tokens, this will store the first token and return true. 2284d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, 2285d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 22865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan one character past where we should, looking for a '/' character. Once 2287fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner // we find it, check to see if it was preceded by a *. This common 22885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // optimization helps people who like to put a lot of * characters in their 22895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // comments. 22908146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner 22918146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // The first character we get with newlines and trigraphs skipped to handle 22928146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // the degenerate /*/ case below correctly if the * has an escaped newline 22938146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // after it. 22948146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner unsigned CharSize; 22958146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner unsigned char C = getCharAndSize(CurPtr, CharSize); 22968146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner CurPtr += CharSize; 22975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == 0 && CurPtr == BufferEnd+1) { 22987d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (!isLexingRawMode()) 22990af574270d3be2b0e73a3379dfaa633746f8fc6fChris Lattner Diag(BufferPtr, diag::err_unterminated_block_comment); 230031f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner --CurPtr; 23011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 230231f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // KeepWhitespaceMode should return this broken comment as a token. Since 230331f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // it isn't a well formed comment, just return it as an 'unknown' token. 230431f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner if (isKeepWhitespaceMode()) { 23059e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 230631f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner return true; 230731f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner } 23081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 230931f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner BufferPtr = CurPtr; 23102d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 23115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23138146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // Check to see if the first character after the '/*' is another /. If so, 23148146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // then this slash does not end the block comment, it is part of it. 23158146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner if (C == '/') 23168146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner C = *CurPtr++; 23171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 23195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip over all non-interesting characters until we find end of buffer or a 23205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // (probably ending) '/' character. 23217d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (CurPtr + 24 < BufferEnd && 23227d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // If there is a code-completion point avoid the fast scan because it 23237d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // doesn't check for '\0'. 23247d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { 23255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // While not aligned to a 16-byte boundary. 23265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) 23275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '/') goto FoundSlash; 23305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 23315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef __SSE2__ 23326bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines __m128i Slashes = _mm_set1_epi8('/'); 23336bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines while (CurPtr+16 <= BufferEnd) { 23346bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, 23356bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines Slashes)); 23363f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer if (cmp != 0) { 23376300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // Adjust the pointer to point directly after the first slash. It's 23386300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // not necessary to set C here, it will be overwritten at the end of 23396300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // the outer loop. 23409779fdd271bb6a938bdee93f901e4ef7b1a88610Michael J. Spencer CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1; 23413f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer goto FoundSlash; 23423f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer } 23436bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines CurPtr += 16; 23443f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer } 23455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#elif __ALTIVEC__ 23465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer __vector unsigned char Slashes = { 23471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump '/', '/', '/', '/', '/', '/', '/', '/', 23485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer '/', '/', '/', '/', '/', '/', '/', '/' 23495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer }; 23505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (CurPtr+16 <= BufferEnd && 2351176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes)) 23525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += 16; 23531eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump#else 23545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan for '/' quickly. Many block comments are very large. 23555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (CurPtr[0] != '/' && 23565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[1] != '/' && 23575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[2] != '/' && 23585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[3] != '/' && 23595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr+4 < BufferEnd) { 23605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += 4; 23615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#endif 23631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It has to be one of the bytes scanned, increment to it and read one. 23655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Loop to scan the remainder. 23695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '/' && C != '\0') 23705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '/') { 23733f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer FoundSlash: 23745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[-2] == '*') // We found the final */. We're done! 23755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 23761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { 23785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { 23795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We found the final */, though it had an escaped newline between the 23805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // * and /. We're done! 23815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 23825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == '*' && CurPtr[1] != '/') { 23855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a /* inside of the comment, emit a warning. Don't do this 23865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // if this is a /*/, which will end the comment. This misses cases with 23875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // embedded escaped newlines, but oh well. 238874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 238974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::warn_nested_block_comment); 23905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == 0 && CurPtr == BufferEnd+1) { 23927d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (!isLexingRawMode()) 239374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(BufferPtr, diag::err_unterminated_block_comment); 23945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Note: the user probably forgot a */. We could continue immediately 23955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // after the /*, but this would involve lexing a lot of what really is the 23965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // comment, which surely would confuse the parser. 239731f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner --CurPtr; 23981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 239931f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // KeepWhitespaceMode should return this broken comment as a token. Since 240031f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // it isn't a well formed comment, just return it as an 'unknown' token. 240131f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner if (isKeepWhitespaceMode()) { 24029e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 240331f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner return true; 240431f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner } 24051eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 240631f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner BufferPtr = CurPtr; 24072d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 24087d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { 24097d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 24107d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 24117d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 24125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24137d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 24145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 24155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 24173d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // Notify comment handlers about the comment unless we're in a #if 0 block. 24183d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner if (PP && !isLexingRawMode() && 24193d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), 24203d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner getSourceLocation(CurPtr)))) { 2421046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner BufferPtr = CurPtr; 2422046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner return true; // A token has to be returned. 2423046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner } 24242e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregor 24255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are returning comments as tokens, return this comment as a token. 2426fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner if (inKeepCommentMode()) { 24279e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::comment); 24282d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return true; 24295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It is common for the tokens immediately after a /**/ comment to be 24325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // whitespace. Instead of going through the big switch, handle it 2433d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // efficiently now. This is safe even in KeepWhitespaceMode because we would 2434d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // have already returned above with the comment as a token. 24355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (isHorizontalWhitespace(*CurPtr)) { 2436d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); 24372d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 24385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, just return so that the next character will be lexed as a token. 24415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2442d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 24432d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 24445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 24455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 24475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Primary Lexing Entry Points 24485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 24495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ReadToEndOfLine - Read the rest of the current preprocessor line as an 24515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// uninterpreted string. This switches the lexer out of directive mode. 24523093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramervoid Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { 24535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(ParsingPreprocessorDirective && ParsingFilename == false && 24545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer "Must be in a preprocessing directive!"); 2455d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token Tmp; 24565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // CurPtr - Cache BufferPtr in an automatic variable. 24585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *CurPtr = BufferPtr; 24595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 24605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Char = getAndAdvanceChar(CurPtr, Tmp); 24615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Char) { 24625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: 24633093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer if (Result) 24643093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer Result->push_back(Char); 24655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 24665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 0: // Null. 24675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Found end of file? 24685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr-1 != BufferEnd) { 24697d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 24707d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 24717d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 24723093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer return; 24737d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 24747d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 24755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Nope, normal character, continue. 24763093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer if (Result) 24773093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer Result->push_back(Char); 24785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 24795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FALL THROUGH. 24815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\r': 24825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\n': 24835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Okay, we found the end of the line. First, back up past the \0, \r, \n. 24845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(CurPtr[-1] == Char && "Trigraphs for newline?"); 24855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr-1; 24861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 248784021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // Next, lex the character, which should handle the EOD transition. 24885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Lex(Tmp); 248955817afdf9d453a443262a733f6caf6692dca118Douglas Gregor if (Tmp.is(tok::code_completion)) { 24907d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP) 24917d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 249255817afdf9d453a443262a733f6caf6692dca118Douglas Gregor Lex(Tmp); 249355817afdf9d453a443262a733f6caf6692dca118Douglas Gregor } 249484021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne assert(Tmp.is(tok::eod) && "Unexpected token!"); 24951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 24963093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer // Finally, we're done; 24973093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer return; 24985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 25005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 25015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 25025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexEndOfFile - CurPtr points to the end of this file. Handle this 25035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// condition, reporting diagnostics and handling other edge cases as required. 25045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// This returns true if Result contains a token, false if PP.Lex should be 25055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// called again. 2506d217773f106856a11879ec79dc468efefaf2ee75Chris Lattnerbool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { 25075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we hit the end of the file while parsing a preprocessor directive, 25085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // end the preprocessor directive first. The next token returned will 25095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // then be the end of file. 25105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 25115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Done parsing the "line". 25125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ParsingPreprocessorDirective = false; 25135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 251484021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne FormTokenWithChars(Result, CurPtr, tok::eod); 25151eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore comment saving mode, in case it was disabled for directive. 2517651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines if (PP) 2518651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines resetExtendedTokenMode(); 25195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return true; // Have a token. 25201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 252186d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor 252286d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor // If we are in raw mode, return this event as an EOF token. Let the caller 252386d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor // that put us in raw mode handle the event. 252486d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor if (isLexingRawMode()) { 252586d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor Result.startToken(); 252686d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor BufferPtr = BufferEnd; 252786d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor FormTokenWithChars(Result, BufferEnd, tok::eof); 252886d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor return true; 252986d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor } 253081b747b7fcc91c2fba9a3183d8fac80adbfc1d3eDouglas Gregor 2531f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor // Issue diagnostics for unterminated #if and missing newline. 2532f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor 25335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are in a #if directive, emit an error. 25345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (!ConditionalStack.empty()) { 25357d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP->getCodeCompletionFileLoc() != FileLoc) 25362d474ba9e8ae43a1a5a9f72718c0d79092b9453fDouglas Gregor PP->Diag(ConditionalStack.back().IfLoc, 25372d474ba9e8ae43a1a5a9f72718c0d79092b9453fDouglas Gregor diag::err_pp_unterminated_conditional); 25385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ConditionalStack.pop_back(); 25395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 25401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2541b25e5d79d9d1967df058a242e96a62d0d0ace074Chris Lattner // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue 2542b25e5d79d9d1967df058a242e96a62d0d0ace074Chris Lattner // a pedwarn. 25437865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { 25447865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagnosticsEngine &Diags = PP->getDiagnostics(); 25457865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose SourceLocation EndLoc = getSourceLocation(BufferEnd); 25467865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose unsigned DiagID; 25477865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose 25487865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose if (LangOpts.CPlusPlus11) { 25497865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // C++11 [lex.phases] 2.2 p2 25507865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // Prefer the C++98 pedantic compatibility warning over the generic, 25517865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // non-extension, user-requested "missing newline at EOF" warning. 2552c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines if (!Diags.isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) { 25537865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::warn_cxx98_compat_no_newline_eof; 25547865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } else { 25557865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::warn_no_newline_eof; 25567865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25577865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } else { 25587865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::ext_no_newline_eof; 25597865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25607865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose 25617865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose Diag(BufferEnd, DiagID) 25627865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose << FixItHint::CreateInsertion(EndLoc, "\n"); 25637865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 25665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 25675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Finally, let the preprocessor handle this. 25680cdd1fe3ec29b5cbff9a728966ace5c5b5d614f7Jordan Rose return PP->HandleEndOfFile(Result, isPragmaLexer()); 25695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 25705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 25715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from 25725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the specified lexer will return a tok::l_paren token, 0 if it is something 25735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// else and 2 if there are no more tokens in the buffer controlled by the 25745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// lexer. 25755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerunsigned Lexer::isNextPPTokenLParen() { 25765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); 25771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Switch to 'skipping' mode. This will ensure that we can lex a token 25795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // without emitting diagnostics, disables macro expansion, and will cause EOF 25805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // to return an EOF token instead of popping the include stack. 25815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer LexingRawMode = true; 25821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Save state that can be changed while lexing so that we can restore it. 25845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *TmpBufferPtr = BufferPtr; 2585a864cf7c1d774a0f790bfc46befc87d9dbf1f65cChris Lattner bool inPPDirectiveMode = ParsingPreprocessorDirective; 2586d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atStartOfLine = IsAtStartOfLine; 2587d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; 2588d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool leadingSpace = HasLeadingSpace; 25891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2590d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token Tok; 2591d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Lex(Tok); 25921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore state that may have changed. 25945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = TmpBufferPtr; 2595a864cf7c1d774a0f790bfc46befc87d9dbf1f65cChris Lattner ParsingPreprocessorDirective = inPPDirectiveMode; 2596d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = leadingSpace; 2597d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = atStartOfLine; 2598d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = atPhysicalStartOfLine; 25991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 26005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore the lexer back to non-skipping mode. 26015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer LexingRawMode = false; 26021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 260322f6bbcafa8871f4f20c4402d9cbc5c024fee99aChris Lattner if (Tok.is(tok::eof)) 26045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return 2; 260522f6bbcafa8871f4f20c4402d9cbc5c024fee99aChris Lattner return Tok.is(tok::l_paren); 26065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 26075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2608ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// \brief Find the end of a version control conflict marker. 2609d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smithstatic const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, 2610d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith ConflictMarkerKind CMK) { 2611d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; 2612d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith size_t TermLen = CMK == CMK_Perforce ? 5 : 7; 26134967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen); 2614d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith size_t Pos = RestOfBuffer.find(Terminator); 26155f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner while (Pos != StringRef::npos) { 261634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Must occur at start of line. 26170e2c34f92f00628d48968dfea096d36381f494cbStephen Hines if (Pos == 0 || 26180e2c34f92f00628d48968dfea096d36381f494cbStephen Hines (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) { 2619d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith RestOfBuffer = RestOfBuffer.substr(Pos+TermLen); 2620d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith Pos = RestOfBuffer.find(Terminator); 262134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner continue; 262234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 262334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return RestOfBuffer.data()+Pos; 262434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 26256bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines return nullptr; 262634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 262734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 262834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// IsStartOfConflictMarker - If the specified pointer is the start of a version 262934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// control conflict marker like '<<<<<<<', recognize it as such, emit an error 263034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// and recover nicely. This returns true if it is a conflict marker and false 263134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// if not. 263234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattnerbool Lexer::IsStartOfConflictMarker(const char *CurPtr) { 263334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Only a conflict marker if it starts at the beginning of a line. 263434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr != BufferStart && 263534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr[-1] != '\n' && CurPtr[-1] != '\r') 263634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 263734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2638d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if we have <<<<<<< or >>>>. 26394967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") && 26404967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> ")) 264134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 264234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 264334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we have a situation where we don't care about conflict markers, ignore 264434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // it. 2645d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (CurrentConflictMarkerState || isLexingRawMode()) 264634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 264734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2648d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce; 2649d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith 2650d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if there is an ending marker somewhere in the buffer at the 2651d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // start of a line to terminate this conflict marker. 2652d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (FindConflictEnd(CurPtr, BufferEnd, Kind)) { 265334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // We found a match. We are really in a conflict marker. 265434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Diagnose this, and ignore to the end of line. 265534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Diag(CurPtr, diag::err_conflict_marker); 2656d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = Kind; 265734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 265834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Skip ahead to the end of line. We know this exists because the 265934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // end-of-conflict marker starts with \r or \n. 266034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner while (*CurPtr != '\r' && *CurPtr != '\n') { 266134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner assert(CurPtr != BufferEnd && "Didn't find end of line"); 266234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner ++CurPtr; 266334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 266434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner BufferPtr = CurPtr; 266534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return true; 266634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 266734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 266834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // No end of conflict marker found. 266934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 267034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 267134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 267234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2673d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if 2674d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it 2675d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// is the end of a conflict marker. Handle it by ignoring up until the end of 2676d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// the line. This returns true if it is a conflict marker and false if not. 267734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattnerbool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { 267834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Only a conflict marker if it starts at the beginning of a line. 267934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr != BufferStart && 268034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr[-1] != '\n' && CurPtr[-1] != '\r') 268134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 268234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 268334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we have a situation where we don't care about conflict markers, ignore 268434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // it. 2685d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (!CurrentConflictMarkerState || isLexingRawMode()) 268634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 268734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2688d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if we have the marker (4 characters in a row). 2689d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith for (unsigned i = 1; i != 4; ++i) 269034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[i] != CurPtr[0]) 269134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 269234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 269334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we do have it, search for the end of the conflict marker. This could 269434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // fail if it got skipped with a '#if 0' or something. Note that CurPtr might 269534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // be the end of conflict marker. 2696d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (const char *End = FindConflictEnd(CurPtr, BufferEnd, 2697d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState)) { 269834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = End; 269934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 270034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Skip ahead to the end of line. 270134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n') 270234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner ++CurPtr; 270334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 270434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner BufferPtr = CurPtr; 270534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 270634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // No longer in the conflict marker. 2707d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = CMK_None; 270834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return true; 270934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 271034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 271134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 271234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 271334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 27147d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidisbool Lexer::isCodeCompletionPoint(const char *CurPtr) const { 27157d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP && PP->isCodeCompletionEnabled()) { 2716a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); 27177d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return Loc == PP->getCodeCompletionLoc(); 27187d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 27197d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 27207d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 27217d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis} 27227d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 2723c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Roseuint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, 2724c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Token *Result) { 2725c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned CharSize; 2726c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char Kind = getCharAndSize(StartPtr, CharSize); 2727c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2728c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned NumHexDigits; 2729c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Kind == 'u') 2730c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose NumHexDigits = 4; 2731c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else if (Kind == 'U') 2732c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose NumHexDigits = 8; 2733c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else 2734c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2735c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2736bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose if (!LangOpts.CPlusPlus && !LangOpts.C99) { 27378094bac4e987caf90e8fd719c24545add8dafcb6Jordan Rose if (Result && !isLexingRawMode()) 27388094bac4e987caf90e8fd719c24545add8dafcb6Jordan Rose Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); 2739bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose return 0; 2740bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose } 2741bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose 2742c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *CurPtr = StartPtr + CharSize; 2743c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *KindLoc = &CurPtr[-1]; 2744c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2745c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose uint32_t CodePoint = 0; 2746c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose for (unsigned i = 0; i < NumHexDigits; ++i) { 2747c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char C = getCharAndSize(CurPtr, CharSize); 2748c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2749c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned Value = llvm::hexDigitValue(C); 2750c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Value == -1U) { 2751c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result && !isLexingRawMode()) { 2752c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (i == 0) { 2753c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_no_digits) 2754c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose << StringRef(KindLoc, 1); 2755c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else { 2756c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_incomplete); 2757b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose 2758b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose // If the user wrote \U1234, suggest a fixit to \u. 2759b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose if (i == 4 && NumHexDigits == 8) { 2760ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1); 2761b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose Diag(KindLoc, diag::note_ucn_four_not_eight) 2762b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose << FixItHint::CreateReplacement(URange, "u"); 2763b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose } 2764c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2765c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2766bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose 2767c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2768c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2769c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2770c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CodePoint <<= 4; 2771c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CodePoint += Value; 2772c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2773c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CurPtr += CharSize; 2774c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2775c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2776c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result) { 2777c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Result->setFlag(Token::HasUCN); 2778b6c08a64145485a7c233761220b8d82b74aa7546NAKAMURA Takumi if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2) 2779c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose StartPtr = CurPtr; 2780c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else 2781c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose while (StartPtr != CurPtr) 2782c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (void)getAndAdvanceChar(StartPtr, *Result); 2783c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else { 2784c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose StartPtr = CurPtr; 2785c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2786c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2787fbfd6426e256f8be081260609d8fa88ae94ffe14Justin Bogner // Don't apply C family restrictions to UCNs in assembly mode 2788fbfd6426e256f8be081260609d8fa88ae94ffe14Justin Bogner if (LangOpts.AsmPreprocessor) 2789fbfd6426e256f8be081260609d8fa88ae94ffe14Justin Bogner return CodePoint; 2790fbfd6426e256f8be081260609d8fa88ae94ffe14Justin Bogner 2791c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C99 6.4.3p2: A universal character name shall not specify a character whose 2792c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or 2793c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // 0060 (`), nor one in the range D800 through DFFF inclusive.) 2794c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C++11 [lex.charset]p2: If the hexadecimal value for a 2795c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // universal-character-name corresponds to a surrogate code point (in the 2796c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally, 2797c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // if the hexadecimal value for a universal-character-name outside the 2798c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // c-char-sequence, s-char-sequence, or r-char-sequence of a character or 2799c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // string literal corresponds to a control character (in either of the 2800c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the 2801c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // basic source character set, the program is ill-formed. 2802c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint < 0xA0) { 2803c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60) 2804c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return CodePoint; 2805c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2806c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // We don't use isLexingRawMode() here because we need to warn about bad 2807c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs even when skipping preprocessing tokens in a #if block. 2808c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result && PP) { 2809c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint < 0x20 || CodePoint >= 0x7F) 2810c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::err_ucn_control_character); 2811c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else { 2812c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char C = static_cast<char>(CodePoint); 2813c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1); 2814c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2815c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2816c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2817c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2818ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 2819ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) { 2820c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't. 2821ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // We don't use isLexingRawMode() here because we need to diagnose bad 2822c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs even when skipping preprocessing tokens in a #if block. 2823ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (Result && PP) { 2824ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11) 2825ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_surrogate); 2826ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose else 2827ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diag(BufferPtr, diag::err_ucn_escape_invalid); 2828ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 2829c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2830c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2831c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2832c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return CodePoint; 2833c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 2834c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2835d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, 2836d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman const char *CurPtr) { 2837263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( 2838263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko UnicodeWhitespaceCharRanges); 283974c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose if (!isLexingRawMode() && !PP->isPreprocessedOutput() && 2840263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko UnicodeWhitespaceChars.contains(C)) { 284174c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(BufferPtr, diag::ext_unicode_whitespace) 2842ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << makeCharRange(*this, BufferPtr, CurPtr); 2843fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose 2844fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose Result.setFlag(Token::LeadingSpace); 2845d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2846fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose } 2847d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 2848d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 2849fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose 2850d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { 2851ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { 2852ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (!isLexingRawMode() && !ParsingPreprocessorDirective && 2853ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose !PP->isPreprocessedOutput()) { 2854ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, 2855ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose makeCharRange(*this, BufferPtr, CurPtr), 2856ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose /*IsFirst=*/true); 2857ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 2858ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 2859c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose MIOpt.ReadToken(); 2860c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexIdentifier(Result, CurPtr); 2861c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2862c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 28630ed439487491e09faffdbabfacb1d050292c7723Jordan Rose if (!isLexingRawMode() && !ParsingPreprocessorDirective && 28640ed439487491e09faffdbabfacb1d050292c7723Jordan Rose !PP->isPreprocessedOutput() && 2865ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) { 2866c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Non-ASCII characters tend to creep into source code unintentionally. 2867c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Instead of letting the parser complain about the unknown token, 2868c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // just drop the character. 2869c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Note that we can /only/ do this when the non-ASCII character is actually 2870c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // spelled as Unicode, not written as a UCN. The standard requires that 2871c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // we not throw away any possible preprocessor tokens, but there's a 2872c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // loophole in the mapping of Unicode characters to basic character set 2873c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // characters that allows us to map these particular characters to, say, 2874c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // whitespace. 287574c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(BufferPtr, diag::err_non_ascii) 2876ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); 2877c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2878c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose BufferPtr = CurPtr; 2879d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 2880c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2881c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2882c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Otherwise, we have an explicit UCN or a character that's unlikely to show 2883c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // up by accident. 2884c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose MIOpt.ReadToken(); 2885c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose FormTokenWithChars(Result, CurPtr, tok::unknown); 2886d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2887d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 2888d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2889d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanvoid Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { 2890d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = Result.isAtStartOfLine(); 2891d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = Result.hasLeadingSpace(); 2892d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro(); 2893d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Note that this doesn't affect IsAtPhysicalStartOfLine. 2894c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 2895c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2896d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::Lex(Token &Result) { 2897d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Start a new token. 2898d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.startToken(); 2899d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2900d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Set up misc whitespace flags for LexTokenInternal. 2901d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (IsAtStartOfLine) { 2902d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::StartOfLine); 2903d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = false; 2904d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2905d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2906d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (HasLeadingSpace) { 2907d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::LeadingSpace); 2908d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = false; 2909d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2910d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2911d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (HasLeadingEmptyMacro) { 2912d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::LeadingEmptyMacro); 2913d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = false; 2914d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2915d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2916d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; 2917d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = false; 29180f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman bool isRawLex = isLexingRawMode(); 29190f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman (void) isRawLex; 29200f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); 29210f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman // (After the LexTokenInternal call, the lexer might be destroyed.) 29220f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman assert((returnedToken || !isRawLex) && "Raw lex must succeed"); 29230f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman return returnedToken; 2924d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 29255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 29265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexTokenInternal - This implements a simple C family lexer. It is an 29275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// extremely performance critical piece of code. This assumes that the buffer 2928efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// has a null character at the end of the file. This returns a preprocessing 2929efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// token, not a normal token, as such, it is an internal interface. It assumes 2930efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// that the Flags of result have been cleared before calling this. 2931d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { 29325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerLexNextToken: 29335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // New token, can't need cleaning yet. 2934d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::NeedsCleaning); 29356bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines Result.setIdentifierInfo(nullptr); 29361eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // CurPtr - Cache BufferPtr in an automatic variable. 29385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *CurPtr = BufferPtr; 29395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 29405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Small amounts of horizontal whitespace is very common between tokens. 29415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if ((*CurPtr == ' ') || (*CurPtr == '\t')) { 29425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 29435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while ((*CurPtr == ' ') || (*CurPtr == '\t')) 29445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 29451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2946d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // If we are keeping whitespace and other tokens, just return what we just 2947d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // skipped. The next lexer invocation will return the token after the 2948d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // whitespace. 2949d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner if (isKeepWhitespaceMode()) { 29509e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 29516aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // FIXME: The next token will not have LeadingSpace set. 2952d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2953d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner } 29541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2956d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 29575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 29581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. 29601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Read a character, advancing over it. 29625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Char = getAndAdvanceChar(CurPtr, Result); 29639e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner tok::TokenKind Kind; 29641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Char) { 29665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 0: // Null. 29675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Found end of file? 2968d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CurPtr-1 == BufferEnd) 2969d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return LexEndOfFile(Result, CurPtr-1); 29701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29717d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // Check if we are performing code completion. 29727d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 29737d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // Return the code-completion token. 29747d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis Result.startToken(); 29757d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr, tok::code_completion); 2976d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 29777d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 29787d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 297974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 298074d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::null_in_file); 2981d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 2982d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 2983d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 29841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2985d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We know the lexer hasn't changed, so just try again with this lexer. 2986d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 2987d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 2988a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 2989a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner case 26: // DOS & CP/M EOF: "^Z". 2990a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner // If we're in Microsoft extensions mode, treat this as end of file. 29914967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar if (LangOpts.MicrosoftExt) { 29924967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar if (!isLexingRawMode()) 29934967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft); 2994d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return LexEndOfFile(Result, CurPtr-1); 29954967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar } 2996d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2997a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner // If Microsoft extensions are disabled, this is just random garbage. 2998a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner Kind = tok::unknown; 2999a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner break; 3000a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 30015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\n': 30025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\r': 30035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are inside a preprocessor directive and we see the end of line, 300484021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // we know we are done with the directive, so return an EOD token. 30055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 30065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Done parsing the "line". 30075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ParsingPreprocessorDirective = false; 30081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 30095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore comment saving mode, in case it was disabled for directive. 30101a8354659a6007bbae3b5d9161a56ecc8f61a219David Blaikie if (PP) 30116aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose resetExtendedTokenMode(); 30121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 30135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Since we consumed a newline, we are back at the start of a line. 30145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer IsAtStartOfLine = true; 3015d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 30161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 301784021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne Kind = tok::eod; 30185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 30195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 30206aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 30215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // No leading whitespace seen so far. 3022d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::LeadingSpace); 30231eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3024d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3025d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 3026d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3027d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3028d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3029d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 30305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ' ': 30315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\t': 30325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\f': 30335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\v': 30348133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner SkipHorizontalWhitespace: 3035d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 3036d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3037d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 30388133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner 30398133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner SkipIgnoredUnits: 30408133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner CurPtr = BufferPtr; 30411eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 30428133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner // If the next token is obviously a // or /* */ comment, skip it efficiently 30438133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner // too (without going through the big switch stmt). 30448402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && 3045a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman LangOpts.LineComment && 3046a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { 3047d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) 3048d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 30498133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipIgnoredUnits; 3050fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { 3051d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) 3052d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 30538133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipIgnoredUnits; 30548133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner } else if (isHorizontalWhitespace(*CurPtr)) { 30558133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipHorizontalWhitespace; 30568133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner } 3057d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3058d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3059d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 3060a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 30613a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // C99 6.4.4.1: Integer Constants. 30623a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // C99 6.4.4.2: Floating Constants. 30633a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '0': case '1': case '2': case '3': case '4': 30643a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '5': case '6': case '7': case '8': case '9': 30653a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Notify MIOpt that we read a non-whitespace/non-comment token. 30663a5707766850f9ee9daa35299794328b5caf96dcChris Lattner MIOpt.ReadToken(); 30673a5707766850f9ee9daa35299794328b5caf96dcChris Lattner return LexNumericConstant(Result, CurPtr); 30681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 30690093e12513c5c896434915d5e9126f51b780aa61Richard Smith case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal 30705cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // Notify MIOpt that we read a non-whitespace/non-comment token. 30715cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor MIOpt.ReadToken(); 30725cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30730093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (LangOpts.CPlusPlus11 || LangOpts.C11) { 30745cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor Char = getCharAndSize(CurPtr, SizeTmp); 30755cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30765cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-16 string literal 30775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '"') 30785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30795cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf16_string_literal); 30805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30815cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-16 character constant 30825cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '\'') 30835cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30845cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf16_char_constant); 30855cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-16 raw string literal 30870093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char == 'R' && LangOpts.CPlusPlus11 && 30880093e12513c5c896434915d5e9126f51b780aa61Richard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 30892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 30902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 30912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 30922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf16_string_literal); 30932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char == '8') { 30952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); 30962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-8 string literal 30982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char2 == '"') 30992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexStringLiteral(Result, 31002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 31012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 31022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf8_string_literal); 3103176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines if (Char2 == '\'' && LangOpts.CPlusPlus1z) 3104176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines return LexCharConstant( 3105176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 3106176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines SizeTmp2, Result), 3107176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines tok::utf8_char_constant); 31082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31090093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char2 == 'R' && LangOpts.CPlusPlus11) { 31102fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper unsigned SizeTmp3; 31112fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); 31122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-8 raw string literal 31132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char3 == '"') { 31142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 31152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 31162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 31172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp3, Result), 31182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf8_string_literal); 31192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 31202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 31212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 31225cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor } 31235cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31245cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // treat u like the start of an identifier. 31255cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexIdentifier(Result, CurPtr); 31265cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31270093e12513c5c896434915d5e9126f51b780aa61Richard Smith case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal 31285cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // Notify MIOpt that we read a non-whitespace/non-comment token. 31295cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor MIOpt.ReadToken(); 31305cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31310093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (LangOpts.CPlusPlus11 || LangOpts.C11) { 31325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor Char = getCharAndSize(CurPtr, SizeTmp); 31335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31345cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-32 string literal 31355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '"') 31365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf32_string_literal); 31385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-32 character constant 31405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '\'') 31415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf32_char_constant); 31432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-32 raw string literal 31450093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char == 'R' && LangOpts.CPlusPlus11 && 31460093e12513c5c896434915d5e9126f51b780aa61Richard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 31472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 31482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 31492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 31502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf32_string_literal); 31515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor } 31525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // treat U like the start of an identifier. 31545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexIdentifier(Result, CurPtr); 31555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 31562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper case 'R': // Identifier or C++0x raw string literal 31572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Notify MIOpt that we read a non-whitespace/non-comment token. 31582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper MIOpt.ReadToken(); 31592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 316080ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11) { 31612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Char = getCharAndSize(CurPtr, SizeTmp); 31622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char == '"') 31642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 31652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(CurPtr, SizeTmp, Result), 31662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::string_literal); 31672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 31682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // treat R like the start of an identifier. 31702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexIdentifier(Result, CurPtr); 31712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31723a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). 31735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 31765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Wide string literal. 31785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '"') 31795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::wide_string_literal); 31815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Wide raw string literal. 318380ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11 && Char == 'R' && 31842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 31852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 31862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 31872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 31882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::wide_string_literal); 31892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Wide character constant. 31915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '\'') 31925cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31935cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::wide_char_constant); 31945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FALL THROUGH, treating L like the start of an identifier. 31951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 31965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.2: Identifiers. 31975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 31985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': 31992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/ 32005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'V': case 'W': case 'X': case 'Y': case 'Z': 32015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': 32025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 32035cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ 32045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'v': case 'w': case 'x': case 'y': case 'z': 32055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '_': 32065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 32075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 32085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexIdentifier(Result, CurPtr); 32093a5707766850f9ee9daa35299794328b5caf96dcChris Lattner 32103a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '$': // $ in identifiers. 32114e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.DollarIdents) { 321274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 321374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::ext_dollar_in_identifier); 32143a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Notify MIOpt that we read a non-whitespace/non-comment token. 32153a5707766850f9ee9daa35299794328b5caf96dcChris Lattner MIOpt.ReadToken(); 32163a5707766850f9ee9daa35299794328b5caf96dcChris Lattner return LexIdentifier(Result, CurPtr); 32173a5707766850f9ee9daa35299794328b5caf96dcChris Lattner } 32181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 32199e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 32203a5707766850f9ee9daa35299794328b5caf96dcChris Lattner break; 32211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 32225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.4: Character Constants. 32235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\'': 32245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 32255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 32265cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, CurPtr, tok::char_constant); 32275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 32285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.5: String Literals. 32295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '"': 32305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 32315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 32325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, CurPtr, tok::string_literal); 32335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 32345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.6: Punctuators. 32355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '?': 32369e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::question; 32375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '[': 32399e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_square; 32405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ']': 32429e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_square; 32435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '(': 32459e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_paren; 32465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ')': 32489e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_paren; 32495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '{': 32519e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_brace; 32525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '}': 32549e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_brace; 32555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '.': 32575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char >= '0' && Char <= '9') { 32595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 32605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 32615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 32625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); 32634e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CPlusPlus && Char == '*') { 32649e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::periodstar; 32655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += SizeTmp; 32665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '.' && 32675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { 32689e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ellipsis; 32695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 32705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 32715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32729e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::period; 32735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '&': 32765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '&') { 32789e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ampamp; 32795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 32819e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ampequal; 32825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32849e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::amp; 32855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump case '*': 32885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (getCharAndSize(CurPtr, SizeTmp) == '=') { 32899e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::starequal; 32905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32929e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::star; 32935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '+': 32965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '+') { 32985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32999e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plusplus; 33005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 33015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33029e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plusequal; 33035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33049e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plus; 33055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '-': 33085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33099e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner if (Char == '-') { // -- 33105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33119e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minusminus; 33124e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '>' && LangOpts.CPlusPlus && 33139e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* 33145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 33155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 33169e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::arrowstar; 33179e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner } else if (Char == '>') { // -> 33185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33199e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::arrow; 33209e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner } else if (Char == '=') { // -= 33215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33229e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minusequal; 33235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33249e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minus; 33255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '~': 33289e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::tilde; 33295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '!': 33315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (getCharAndSize(CurPtr, SizeTmp) == '=') { 33329e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::exclaimequal; 33335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33359e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::exclaim; 33365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '/': 33395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // 6.4.9: Comments 33405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 3341bb23628148f555a4cf71f98c27096a7a804c085cNico Weber if (Char == '/') { // Line comment. 3342bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // Even if Line comments are disabled (e.g. in C89 mode), we generally 33438402c73dd880e8af46c826d873681820aebe32ebChris Lattner // want to lex this as a comment. There is one problem with this though, 33448402c73dd880e8af46c826d873681820aebe32ebChris Lattner // that in one particular corner case, this can change the behavior of the 33458402c73dd880e8af46c826d873681820aebe32ebChris Lattner // resultant program. For example, In "foo //**/ bar", C89 would lex 3346bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // this as "foo / bar" and langauges with Line comments would lex it as 33478402c73dd880e8af46c826d873681820aebe32ebChris Lattner // "foo". Check to see if the character after the second slash is a '*'. 33488402c73dd880e8af46c826d873681820aebe32ebChris Lattner // If so, we will lex that as a "/" instead of the start of a comment. 3349693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose // However, we never do this if we are just preprocessing. 3350a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman bool TreatAsComment = LangOpts.LineComment && 3351a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); 3352693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (!TreatAsComment) 3353693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (!(PP && PP->isPreprocessedOutput())) 3354693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; 3355693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose 3356693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (TreatAsComment) { 3357d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), 3358d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine)) 3359d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 33601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 33618402c73dd880e8af46c826d873681820aebe32ebChris Lattner // It is common for the tokens immediately after a // comment to be 33628402c73dd880e8af46c826d873681820aebe32ebChris Lattner // whitespace (indentation for the next line). Instead of going through 33638402c73dd880e8af46c826d873681820aebe32ebChris Lattner // the big switch, handle it efficiently now. 33648402c73dd880e8af46c826d873681820aebe32ebChris Lattner goto SkipIgnoredUnits; 33658402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33668402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 33688402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (Char == '*') { // /**/ comment. 3369d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), 3370d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine)) 3371d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 3372d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3373d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3374d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3375d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 33768402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 33788402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (Char == '=') { 33795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33809e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::slashequal; 33815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33829e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::slash; 33835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '%': 33865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 33889e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::percentequal; 33895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33904e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == '>') { 33919e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_brace; // '%>' -> '}' 33925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33934e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == ':') { 33945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { 33979e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashhash; // '%:%:' -> '##' 33985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 33995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 34004e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize 34015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 340274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 340366d5ce11b9426f6a59f61a03cbd8dbf047cc9350Ted Kremenek Diag(BufferPtr, diag::ext_charize_microsoft); 34049e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashat; 3405e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner } else { // '%:' -> '#' 34065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We parsed a # character. If this occurs at the start of the line, 34075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it's actually the start of a preprocessing directive. Callback to 34085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the preprocessor to handle it. 34096bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines // TODO: -fpreprocessed mode?? 3410d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) 34113185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis goto HandleDirective; 34121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3413e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner Kind = tok::hash; 34145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34169e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::percent; 34175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '<': 34205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingFilename) { 34229cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner return LexAngledStringLiteral(Result, CurPtr); 34235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '<') { 342434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); 342534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (After == '=') { 342634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::lesslessequal; 342734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 342834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner SizeTmp2, Result); 342934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { 343034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is actually a '<<<<<<<' version control conflict marker, 343134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // recognize it as such and recover nicely. 343234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 3433d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { 3434d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is '<<<<' and we're in a Perforce-style conflict marker, 3435d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // ignore it. 3436d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith goto LexNextToken; 34374e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CUDA && After == '<') { 34381b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne Kind = tok::lesslessless; 34391b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 34401b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne SizeTmp2, Result); 344134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else { 344234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 344334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::lessless; 344434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 34455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 34465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34479e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::lessequal; 34484e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' 344980ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11 && 345087a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { 345187a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // C++0x [lex.pptoken]p3: 345287a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // Otherwise, if the next three characters are <:: and the subsequent 345387a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // character is neither : nor >, the < is treated as a preprocessor 345487a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // token by itself and not as the first character of the alternative 345587a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // token <:. 345687a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith unsigned SizeTmp3; 345787a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); 345887a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith if (After != ':' && After != '>') { 345987a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith Kind = tok::less; 3460661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode()) 3461661a99690bc133bbaa029da925481d4a860dec90Richard Smith Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon); 346287a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith break; 346387a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith } 346487a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith } 346587a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith 34665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34679e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_square; 34684e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' 34695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34709e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_brace; 34715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34729e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::less; 34735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '>': 34765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 34785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34799e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::greaterequal; 34805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '>') { 348134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); 348234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (After == '=') { 348334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 348434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner SizeTmp2, Result); 348534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::greatergreaterequal; 3486d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { 3487d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is actually a '>>>>' conflict marker, recognize it as such 3488d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // and recover nicely. 3489d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith goto LexNextToken; 349034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { 349134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is '>>>>>>>' and we're in a conflict marker, ignore it. 349234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 34934e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CUDA && After == '>') { 34941b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne Kind = tok::greatergreatergreater; 34951b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 34961b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne SizeTmp2, Result); 349734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else { 349834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 349934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::greatergreater; 350034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 350134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 35025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 35039e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::greater; 35045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '^': 35075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 35085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 35095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35109e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::caretequal; 35114967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar } else if (LangOpts.OpenCL && Char == '^') { 35124967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35134967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar Kind = tok::caretcaret; 35145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 35159e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::caret; 35165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '|': 35195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 35205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 35219e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipeequal; 35225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '|') { 352434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is '|||||||' and we're in a conflict marker, ignore it. 352534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) 352634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 35279e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipepipe; 35285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 35309e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipe; 35315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ':': 35345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 35354e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.Digraphs && Char == '>') { 35369e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_square; // ':>' -> ']' 35375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35384e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CPlusPlus && Char == ':') { 35399e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::coloncolon; 35405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35411eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } else { 35429e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::colon; 35435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ';': 35469e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::semi; 35475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '=': 35495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 35505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 3551d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is '====' and we're in a conflict marker, ignore it. 355234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) 355334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 355434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 35559e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::equalequal; 35565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } else { 35589e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::equal; 35595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ',': 35629e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::comma; 35635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '#': 35655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 35665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '#') { 35679e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashhash; 35685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35694e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize 35709e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashat; 357174d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 357266d5ce11b9426f6a59f61a03cbd8dbf047cc9350Ted Kremenek Diag(BufferPtr, diag::ext_charize_microsoft); 35735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 35755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We parsed a # character. If this occurs at the start of the line, 35765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it's actually the start of a preprocessing directive. Callback to 35775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the preprocessor to handle it. 35786bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines // TODO: -fpreprocessed mode?? 3579d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) 35803185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis goto HandleDirective; 35811eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3582e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner Kind = tok::hash; 35835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 35863a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '@': 35873a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Objective C support. 35884e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (CurPtr[-1] == '@' && LangOpts.ObjC1) 35899e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::at; 35903a5707766850f9ee9daa35299794328b5caf96dcChris Lattner else 35919e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 35923a5707766850f9ee9daa35299794328b5caf96dcChris Lattner break; 35931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3594c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs (C99 6.4.3, C++11 [lex.charset]p2) 35955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\\': 3596d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { 3597d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { 3598d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3599d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 3600d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3601d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3602d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3603d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 3604d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3605d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3606c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexUnicode(Result, CodePoint, CurPtr); 3607d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3608c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 36099e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 36105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 3611c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3612c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose default: { 3613c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (isASCII(Char)) { 3614c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Kind = tok::unknown; 3615c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose break; 3616c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 3617c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3618c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose UTF32 CodePoint; 3619c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3620c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // We can't just reset CurPtr to BufferPtr because BufferPtr may point to 3621c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // an escaped newline. 3622c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose --CurPtr; 3623cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko ConversionResult Status = 3624cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, 3625cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko (const UTF8 *)BufferEnd, 3626cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko &CodePoint, 3627cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko strictConversion); 3628d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (Status == conversionOK) { 3629d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { 3630d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3631d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 3632d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3633d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3634d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3635d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 3636d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3637c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexUnicode(Result, CodePoint, CurPtr); 3638d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3639c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 36400ed439487491e09faffdbabfacb1d050292c7723Jordan Rose if (isLexingRawMode() || ParsingPreprocessorDirective || 36410ed439487491e09faffdbabfacb1d050292c7723Jordan Rose PP->isPreprocessedOutput()) { 364220afc2977cd0a6bacbe6218a633cd59a24463e2fJordan Rose ++CurPtr; 364374c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Kind = tok::unknown; 364474c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose break; 364574c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose } 364674c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose 3647c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Non-ASCII characters tend to creep into source code unintentionally. 3648c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Instead of letting the parser complain about the unknown token, 3649ae82c2b7b62d742f56638fa3cfb0f550ddcaf315Jordan Rose // just diagnose the invalid UTF-8, then drop the character. 365074c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(CurPtr, diag::err_invalid_utf8); 3651c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3652c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose BufferPtr = CurPtr+1; 3653d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We're pretending the character didn't exist, so just try again with 3654d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // this lexer. 3655d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3656c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose goto LexNextToken; 3657c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 36585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 36591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 36605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 36615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 36625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 36635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 36649e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, Kind); 3665d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 36663185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 36673185d4ac30378995ef70421e2848f77524c2b5d5Argyrios KyrtzidisHandleDirective: 36683185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis // We parsed a # character and it's the start of a preprocessing directive. 36693185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 36703185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr, tok::hash); 36713185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis PP->HandleDirective(Result); 36723185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 36733b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis if (PP->hadModuleLoaderFatalFailure()) { 36743b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis // With a fatal failure in the module loader, we abort parsing. 36753b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof"); 3676d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 36773b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis } 36783b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis 3679d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We parsed the directive; lex a token with the new state. 3680d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 36815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 3682