Lexer.cpp revision 859b6227694033dd6eaf3991a2b80877a406c382
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// 25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// The LLVM Compiler Infrastructure 45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source 60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details. 75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 10d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner// This file implements the Lexer and Token interfaces. 115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// TODO: GCC Diagnostics emitted by the lexer: 155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// PEDWARN: (form feed|vertical tab) in preprocessing directive 165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Universal characters, unicode, char mapping: 185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// WARNING: `%.*s' is not in NFKC 195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// WARNING: `%.*s' is not in NFC 205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Other: 225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// TODO: Options to support: 235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// -fexec-charset,-fwide-exec-charset 245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// 255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/Lexer.h" 289893902eceba7f01dd1521349d33866f77254d78Jordan Rose#include "clang/Basic/CharInfo.h" 299dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner#include "clang/Basic/SourceManager.h" 3055fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/CodeCompletionHandler.h" 3155fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/LexDiagnostic.h" 3206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith#include "clang/Lex/LiteralSupport.h" 3355fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/Preprocessor.h" 34d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis#include "llvm/ADT/STLExtras.h" 35c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose#include "llvm/ADT/StringExtras.h" 3655fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "llvm/ADT/StringSwitch.h" 37409a03671224d4e5bdab1594c43baf070148f830Chris Lattner#include "llvm/Support/Compiler.h" 38cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h" 395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/Support/MemoryBuffer.h" 40ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose#include "UnicodeCharSets.h" 412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper#include <cstring> 425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang; 435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 44dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 45dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner// Token Class Implementation 46dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 47dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. 49dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattnerbool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { 50bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor if (IdentifierInfo *II = getIdentifierInfo()) 51bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor return II->getObjCKeywordID() == objcKey; 52bec1c9d6f3feeec112cc8eeae90c1be29c6aaf13Douglas Gregor return false; 53dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner} 54dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 55dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner/// getObjCKeywordID - Return the ObjC keyword kind. 56dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattnertok::ObjCKeywordKind Token::getObjCKeywordID() const { 57dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner IdentifierInfo *specId = getIdentifierInfo(); 58dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; 59dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner} 60dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 6153702cd401b8fdca985aede7732c2f6a82ad9b1cChris Lattner 62dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 63dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner// Lexer Class Implementation 64dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner//===----------------------------------------------------------------------===// 65dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 6699ba9e3bd70671f3441fb974895f226a83ce0e66David Blaikievoid Lexer::anchor() { } 6799ba9e3bd70671f3441fb974895f226a83ce0e66David Blaikie 681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpvoid Lexer::InitLexer(const char *BufStart, const char *BufPtr, 6922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner const char *BufEnd) { 7022d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferStart = BufStart; 7122d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferPtr = BufPtr; 7222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner BufferEnd = BufEnd; 731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 7422d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner assert(BufEnd[0] == 0 && 7522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner "We assume that the input buffer has a null character at the end" 7622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner " to simplify lexing!"); 771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 78156119df1d076b63609618976281961283f871dbEric Christopher // Check whether we have a BOM in the beginning of the buffer. If yes - act 79156119df1d076b63609618976281961283f871dbEric Christopher // accordingly. Right now we support only UTF-8 with and without BOM, so, just 80156119df1d076b63609618976281961283f871dbEric Christopher // skip the UTF-8 BOM if it's present. 81156119df1d076b63609618976281961283f871dbEric Christopher if (BufferStart == BufferPtr) { 82156119df1d076b63609618976281961283f871dbEric Christopher // Determine the size of the BOM. 835f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buf(BufferStart, BufferEnd - BufferStart); 84969f9d47338fc36ebb6d24ad3a51e45eda07fd58Eli Friedman size_t BOMLength = llvm::StringSwitch<size_t>(Buf) 85156119df1d076b63609618976281961283f871dbEric Christopher .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM 86156119df1d076b63609618976281961283f871dbEric Christopher .Default(0); 87156119df1d076b63609618976281961283f871dbEric Christopher 88156119df1d076b63609618976281961283f871dbEric Christopher // Skip the BOM. 89156119df1d076b63609618976281961283f871dbEric Christopher BufferPtr += BOMLength; 90156119df1d076b63609618976281961283f871dbEric Christopher } 91156119df1d076b63609618976281961283f871dbEric Christopher 9222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner Is_PragmaLexer = false; 93d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = CMK_None; 94156119df1d076b63609618976281961283f871dbEric Christopher 9522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // Start of the file is a start of line. 9622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner IsAtStartOfLine = true; 97d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 98d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 99d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = false; 100d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = false; 1011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not after parsing a #. 10322d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ParsingPreprocessorDirective = false; 1041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not after parsing #include. 10622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ParsingFilename = false; 1071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10822d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // We are not in raw mode. Raw mode disables diagnostics and interpretation 10922d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // of tokens (e.g. identifiers, thus disabling macro expansion). It is used 11022d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block 11122d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // or otherwise skipping over tokens. 11222d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner LexingRawMode = false; 1131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 11422d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner // Default to not keeping comments. 11522d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner ExtendedTokenMode = 0; 11622d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner} 11722d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner 1180770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// Lexer constructor - Create a new lexer object for the specified buffer 1190770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// with the specified preprocessor managing the lexing process. This lexer 1200770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// assumes that the associated file buffer and Preprocessor objects will 1210770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner/// outlive it, so it doesn't take ownership of either of them. 1226e2901407bff59aeb4cc301cc58b034723d0eb49Chris LattnerLexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) 12388d3ac1341aa016cabd966c5b113a95ac05ea43fChris Lattner : PreprocessorLexer(&PP, FID), 12488d3ac1341aa016cabd966c5b113a95ac05ea43fChris Lattner FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), 1254e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie LangOpts(PP.getLangOpts()) { 1261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1270770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), 1280770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner InputFile->getBufferEnd()); 1291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1306aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose resetExtendedTokenMode(); 1316aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose} 1326aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 1336aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rosevoid Lexer::resetExtendedTokenMode() { 1346aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose assert(PP && "Cannot reset token mode without a preprocessor"); 1356aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose if (LangOpts.TraditionalCPP) 1366aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SetKeepWhitespaceMode(true); 1376aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose else 1386aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SetCommentRetentionState(PP->getCommentRetentionState()); 1390770dabb1ae81a2a9c2e7199262067103062a0b3Chris Lattner} 140dbf388b54bb1688ee4470c61235494cbc34b8f3cChris Lattner 141168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner/// Lexer constructor - Create a new raw lexer object. This object is only 142092bf67e5ca560d2fc6aa70be1f172b8b3a5ff96Dmitri Gribenko/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text 143590f0cc643274267d4d41125b62557e1d87886c3Chris Lattner/// range will outlive it, so it doesn't take ownership of it. 1444e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid BlaikieLexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, 145de96c0f29c4cacabe6ea577c61db87c2a85aea6cChris Lattner const char *BufStart, const char *BufPtr, const char *BufEnd) 1464e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie : FileLoc(fileloc), LangOpts(langOpts) { 14722d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner 14822d91ca8d7c134eac5cc6a4869e6a84c461ad624Chris Lattner InitLexer(BufStart, BufPtr, BufEnd); 1491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 150168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner // We *are* in raw mode. 151168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner LexingRawMode = true; 1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 154025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner/// Lexer constructor - Create a new raw lexer object. This object is only 155092bf67e5ca560d2fc6aa70be1f172b8b3a5ff96Dmitri Gribenko/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text 156025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner/// range will outlive it, so it doesn't take ownership of it. 1576e2901407bff59aeb4cc301cc58b034723d0eb49Chris LattnerLexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, 1584e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const SourceManager &SM, const LangOptions &langOpts) 1594e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie : FileLoc(SM.getLocForStartOfFile(FID)), LangOpts(langOpts) { 160025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner 1611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(), 162025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner FromFile->getBufferEnd()); 1631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 164025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner // We *are* in raw mode. 165025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner LexingRawMode = true; 166025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner} 167025c3a66402fb713c2d9bf5dc174ff264765379aChris Lattner 16842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for 16942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// _Pragma expansion. This has a variety of magic semantics that this method 17042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// sets up. It returns a new'd Lexer that must be delete'd when done. 17142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 17242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// On entrance to this routine, TokStartLoc is a macro location which has a 17342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// spelling loc that indicates the bytes to be lexed for the token and an 174433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// expansion location that indicates where all lexed tokens should be 17542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// "expanded from". 17642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 17742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// FIXME: It would really be nice to make _Pragma just be a wrapper around a 17842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// normal lexer that remaps tokens as they fly by. This would require making 17942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// Preprocessor::Lex virtual. Given that, we could just dump in a magic lexer 18042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// interface that could handle this stuff. This would pull GetMappedTokenLoc 18142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// out of the critical path of the lexer! 18242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner/// 1831eb4433ac451dc16f4133a88af2d002ac26c58efMike StumpLexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, 184433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth SourceLocation ExpansionLocStart, 185433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth SourceLocation ExpansionLocEnd, 186bcc2a67e5180612417727cbdd8afd0f79fdf726dChris Lattner unsigned TokLen, Preprocessor &PP) { 18742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner SourceManager &SM = PP.getSourceManager(); 18842e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 18942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Create the lexer as if we were going to lex the file normally. 190a11d61793341fea195c29a0dab3fbd74f2b39a8cChris Lattner FileID SpellingFID = SM.getFileID(SpellingLoc); 1916e2901407bff59aeb4cc301cc58b034723d0eb49Chris Lattner const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID); 1926e2901407bff59aeb4cc301cc58b034723d0eb49Chris Lattner Lexer *L = new Lexer(SpellingFID, InputFile, PP); 1931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Now that the lexer is created, change the start/end locations so that we 19542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // just lex the subsection of the file that we want. This is lexing from a 19642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // scratch buffer. 19742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner const char *StrData = SM.getCharacterData(SpellingLoc); 1981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->BufferPtr = StrData; 20042e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->BufferEnd = StrData+TokLen; 2011fa495304c81e03f07f278a47b5efe9317104aabChris Lattner assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); 20242e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 20342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Set the SourceLocation with the remapping information. This ensures that 20442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // GetMappedTokenLoc will remap the tokens as they are lexed. 205bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), 206bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth ExpansionLocStart, 207bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth ExpansionLocEnd, TokLen); 2081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20942e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // Ensure that the lexer thinks it is inside a directive, so that end \n will 21084021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // return an EOD token. 21142e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->ParsingPreprocessorDirective = true; 2121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21342e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner // This lexer really is for _Pragma. 21442e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner L->Is_PragmaLexer = true; 21542e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner return L; 21642e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner} 21742e00d19b0dac64732eb5449d52a076282fcbf77Chris Lattner 218168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner 2195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Stringify - Convert the specified string into a C string, with surrounding 2205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ""'s, and with escaped \ and " characters. 2215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstd::string Lexer::Stringify(const std::string &Str, bool Charify) { 2225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer std::string Result = Str; 2235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Quote = Charify ? '\'' : '"'; 2245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (unsigned i = 0, e = Result.size(); i != e; ++i) { 2255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Result[i] == '\\' || Result[i] == Quote) { 2265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Result.insert(Result.begin()+i, '\\'); 2275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++i; ++e; 2285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return Result; 2315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 2325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 233d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner/// Stringify - Convert the specified string into a C string by escaping '\' 234d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner/// and " characters. This does not add surrounding ""'s to the string. 2355f9e272e632e951b1efe824cd16acb4d96077930Chris Lattnervoid Lexer::Stringify(SmallVectorImpl<char> &Str) { 236d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner for (unsigned i = 0, e = Str.size(); i != e; ++i) { 237d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner if (Str[i] == '\\' || Str[i] == '"') { 238d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner Str.insert(Str.begin()+i, '\\'); 239d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner ++i; ++e; 240d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner } 241d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner } 242d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner} 243d8e3083840fef752d11ca183f42786470ed061e3Chris Lattner 244b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner//===----------------------------------------------------------------------===// 245b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner// Token Spelling 246b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner//===----------------------------------------------------------------------===// 247b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 24830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith/// \brief Slow case of getSpelling. Extract the characters comprising the 24930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith/// spelling of this token from the provided input buffer. 25030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smithstatic size_t getSpellingSlow(const Token &Tok, const char *BufPtr, 25130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const LangOptions &LangOpts, char *Spelling) { 25230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith assert(Tok.needsCleaning() && "getSpellingSlow called on simple token"); 25330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 25430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith size_t Length = 0; 25530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *BufEnd = BufPtr + Tok.getLength(); 25630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 25730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith if (Tok.is(tok::string_literal)) { 25830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Munch the encoding-prefix and opening double-quote. 25930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith while (BufPtr < BufEnd) { 26030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith unsigned Size; 26130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 26230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += Size; 26330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 26430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith if (Spelling[Length - 1] == '"') 26530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith break; 26630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 26730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 26830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Raw string literals need special handling; trigraph expansion and line 26930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // splicing do not occur within their d-char-sequence nor within their 27030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // r-char-sequence. 27130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith if (Length >= 2 && 27230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { 27330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Search backwards from the end of the token to find the matching closing 27430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // quote. 27530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *RawEnd = BufEnd; 27630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith do --RawEnd; while (*RawEnd != '"'); 27730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith size_t RawLength = RawEnd - BufPtr + 1; 27830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 27930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Everything between the quotes is included verbatim in the spelling. 28030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith memcpy(Spelling + Length, BufPtr, RawLength); 28130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Length += RawLength; 28230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += RawLength; 28330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 28430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // The rest of the token is lexed normally. 28530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 28630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 28730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 28830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith while (BufPtr < BufEnd) { 28930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith unsigned Size; 29030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 29130cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith BufPtr += Size; 29230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith } 29330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 29430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith assert(Length < Tok.getLength() && 29530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith "NeedsCleaning flag set on token that didn't need cleaning!"); 29630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return Length; 29730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith} 29830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 299b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// getSpelling() - Return the 'spelling' of this token. The spelling of a 300b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// token are the characters used to represent the token in the source file 301b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// after trigraph expansion and escaped-newline folding. In particular, this 302b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// wants to get the true, uncanonicalized, spelling of things like digraphs 303b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// UCNs, etc. 3045f9e272e632e951b1efe824cd16acb4d96077930Chris LattnerStringRef Lexer::getSpelling(SourceLocation loc, 30530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith SmallVectorImpl<char> &buffer, 30630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const SourceManager &SM, 30730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const LangOptions &options, 30830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith bool *invalid) { 309834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Break down the source location. 310834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc); 311834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 312834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Try to the load the file buffer. 313834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall bool invalidTemp = false; 3145f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); 315834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (invalidTemp) { 316834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (invalid) *invalid = true; 3175f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(); 318834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall } 319834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 320834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall const char *tokenBegin = file.data() + locInfo.second; 321834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 322834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Lex from the start of the given location. 323834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, 324834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall file.begin(), tokenBegin, file.end()); 325834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall Token token; 326834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall lexer.LexFromRawLexer(token); 327834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 328834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall unsigned length = token.getLength(); 329834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 330834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall // Common case: no need for cleaning. 331834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall if (!token.needsCleaning()) 3325f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(tokenBegin, length); 333834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 33430cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // Hard case, we need to relex the characters into the string. 33530cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith buffer.resize(length); 33630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data())); 3375f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner return StringRef(buffer.data(), buffer.size()); 338834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall} 339834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall 340834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// getSpelling() - Return the 'spelling' of this token. The spelling of a 341834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// token are the characters used to represent the token in the source file 342834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// after trigraph expansion and escaped-newline folding. In particular, this 343834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// wants to get the true, uncanonicalized, spelling of things like digraphs 344834e3f6c77d9ac03997a3f0c56934edcf406a355John McCall/// UCNs, etc. 345b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattnerstd::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, 3464e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts, bool *Invalid) { 347b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 34830cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 349b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner bool CharDataInvalid = false; 35030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 351b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner &CharDataInvalid); 352b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Invalid) 353b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner *Invalid = CharDataInvalid; 354b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (CharDataInvalid) 355b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return std::string(); 35630cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 35730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith // If this token contains nothing interesting, return it directly. 358b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (!Tok.needsCleaning()) 35930cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return std::string(TokStart, TokStart + Tok.getLength()); 36030cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith 361b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner std::string Result; 36230cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Result.resize(Tok.getLength()); 36330cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin())); 364b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return Result; 365b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner} 366b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 367b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// getSpelling - This method is used to get the spelling of a token into a 368b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// preallocated buffer, instead of as an std::string. The caller is required 369b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// to allocate enough space for the token, which is guaranteed to be at least 370b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// Tok.getLength() bytes long. The actual length of the token is returned. 371b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// 372b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// Note that this method may do two possible things: it may either fill in 373b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// the buffer specified with characters, or it may *change the input pointer* 374b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// to point to a constant buffer with the data already in it (avoiding a 375b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// copy). The caller is not allowed to modify the returned buffer pointer 376b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner/// if an internal buffer is returned. 377b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattnerunsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 378b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner const SourceManager &SourceMgr, 3794e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts, bool *Invalid) { 380b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); 381c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 382c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara const char *TokStart = 0; 383c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // NOTE: this has to be checked *before* testing for an IdentifierInfo. 384c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (Tok.is(tok::raw_identifier)) 385c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara TokStart = Tok.getRawIdentifierData(); 386c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else if (!Tok.hasUCN()) { 387c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { 388c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Just return the string from the identifier table, which is very quick. 389c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Buffer = II->getNameStart(); 390c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return II->getLength(); 391c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 392b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 393c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 394c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // NOTE: this can be checked even after testing for an IdentifierInfo. 395b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Tok.isLiteral()) 396b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner TokStart = Tok.getLiteralData(); 397c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 398b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (TokStart == 0) { 399c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // Compute the start of the token in the input lexer buffer. 400b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner bool CharDataInvalid = false; 401b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); 402b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (Invalid) 403b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner *Invalid = CharDataInvalid; 404b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (CharDataInvalid) { 405b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner Buffer = ""; 406b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return 0; 407b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 408b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 409c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 410b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner // If this token contains nothing interesting, return it directly. 411b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner if (!Tok.needsCleaning()) { 412b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner Buffer = TokStart; 413b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner return Tok.getLength(); 414b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner } 415c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara 416b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner // Otherwise, hard case, relex the characters into the string. 41730cddaec99fa6c3207613efdaedbb51dd8d70c77Richard Smith return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); 418b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner} 419b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 420b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner 4219a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// MeasureTokenLength - Relex the token at the specified location and return 4229a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// its length in bytes in the input file. If the token needs cleaning (e.g. 4239a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// includes a trigraph or an escaped newline) then this count includes bytes 4249a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner/// that are part of that. 4259a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattnerunsigned Lexer::MeasureTokenLength(SourceLocation Loc, 4262c78b873f4f3823ae859c15674cb3d76c8554113Chris Lattner const SourceManager &SM, 4272c78b873f4f3823ae859c15674cb3d76c8554113Chris Lattner const LangOptions &LangOpts) { 428d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis Token TheTok; 429d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis if (getRawToken(Loc, TheTok, SM, LangOpts)) 430d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return 0; 431d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return TheTok.getLength(); 432d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis} 433d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis 434d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis/// \brief Relex the token at the specified location. 435d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis/// \returns true if there was a failure, false on success. 436d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidisbool Lexer::getRawToken(SourceLocation Loc, Token &Result, 437d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis const SourceManager &SM, 438f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian const LangOptions &LangOpts, 439f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian bool IgnoreWhiteSpace) { 4409a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // TODO: this could be special cased for common tokens like identifiers, ')', 4419a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // etc to make this faster, if it mattered. Just look at StrData[0] to handle 4421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // all obviously single-char tokens. This could use 4439a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // Lexer::isObviouslySimpleCharacter for example to handle identifiers or 4449a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // something. 445de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner 446de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner // If this comes from a macro expansion, we really do want the macro name, not 447de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner // the token this macro expanded to. 448402785357ab053dd53f4fdd858b9630a5e0f8badChandler Carruth Loc = SM.getExpansionLoc(Loc); 449363fdc29656cc03c1817268888f95e6343470aa8Chris Lattner std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 450f715ca12bfc9fddfde75f98a197424434428b821Douglas Gregor bool Invalid = false; 4515f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 452f715ca12bfc9fddfde75f98a197424434428b821Douglas Gregor if (Invalid) 453d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return true; 454f6ac97b101c8840efa92bf29166077ce4049e293Benjamin Kramer 455f6ac97b101c8840efa92bf29166077ce4049e293Benjamin Kramer const char *StrData = Buffer.data()+LocInfo.second; 4568350394c65b81bba3986dfe44ae17423873741deChris Lattner 457f0dd853bb1e8f3e59b169e6d34a8556c6003c47cFariborz Jahanian if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) 458d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return true; 45933e9abd21083a0191a7676a04b497006d2da184dDouglas Gregor 4609a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner // Create a lexer starting at the beginning of this token. 461c3526d89ef9c31639ec8b25180cfb22354344241Sebastian Redl Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, 462c3526d89ef9c31639ec8b25180cfb22354344241Sebastian Redl Buffer.begin(), StrData, Buffer.end()); 46339de7409bffb6b725a8aa64f0ba77ab51e8c9eb3Chris Lattner TheLexer.SetCommentRetentionState(true); 464d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis TheLexer.LexFromRawLexer(Result); 465d93335c43fd462145fee3ea8f4d84d430577c821Argyrios Kyrtzidis return false; 4669a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner} 4679a6119437672f42be5f50c3fe89fe843b1bfa5b5Chris Lattner 4680e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidisstatic SourceLocation getBeginningOfFileToken(SourceLocation Loc, 4690e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const SourceManager &SM, 4700e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const LangOptions &LangOpts) { 4710e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis assert(Loc.isFileID()); 472a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 4733de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor if (LocInfo.first.isInvalid()) 4743de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor return Loc; 4753de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor 476a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor bool Invalid = false; 4775f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 478a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (Invalid) 479a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 480a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 481a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Back up from the current location until we hit the beginning of a line 482a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // (or the buffer). We'll relex from that point. 483a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *BufStart = Buffer.data(); 4843de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor if (LocInfo.second >= Buffer.size()) 4853de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor return Loc; 4863de84241d90f3dd280126fdf2c4651667151c967Douglas Gregor 487a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *StrData = BufStart+LocInfo.second; 488a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (StrData[0] == '\n' || StrData[0] == '\r') 489a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 490a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 491a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor const char *LexStart = StrData; 492a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor while (LexStart != BufStart) { 493a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (LexStart[0] == '\n' || LexStart[0] == '\r') { 494a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor ++LexStart; 495a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor break; 496a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 497a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 498a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor --LexStart; 499a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 500a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 501a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Create a lexer starting at the beginning of this token. 502a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); 503a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); 504a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor TheLexer.SetCommentRetentionState(true); 505a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 506a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Lex tokens until we find the token that contains the source location. 507a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor Token TheTok; 508a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor do { 509a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 510a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 511a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (TheLexer.getBufferLocation() > StrData) { 512a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // Lexing this token has taken the lexer past the source location we're 513a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // looking for. If the current token encompasses our source location, 514a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // return the beginning of that token. 515a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) 516a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return TheTok.getLocation(); 517a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 518a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // We ended up skipping over the source location entirely, which means 519a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // that it points into whitespace. We're done here. 520a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor break; 521a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } 522a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor } while (TheTok.getKind() != tok::eof); 523a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 524a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor // We've passed our source location; just return the original source location. 525a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor return Loc; 526a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor} 527a8e5c5bdbe387b2552c1c23b828f54abcf085a40Douglas Gregor 5280e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios KyrtzidisSourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, 5290e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const SourceManager &SM, 5300e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis const LangOptions &LangOpts) { 5310e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis if (Loc.isFileID()) 5320e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis return getBeginningOfFileToken(Loc, SM, LangOpts); 5330e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 5340e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis if (!SM.isMacroArgExpansion(Loc)) 5350e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis return Loc; 5360e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 5370e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis SourceLocation FileLoc = SM.getSpellingLoc(Loc); 5380e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); 5390e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); 540ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth std::pair<FileID, unsigned> BeginFileLocInfo 541ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth = SM.getDecomposedLoc(BeginFileLoc); 5420e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis assert(FileLocInfo.first == BeginFileLocInfo.first && 5430e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis FileLocInfo.second >= BeginFileLocInfo.second); 544ae9f85b2c062ac20512a484cfa6e66239dd3d0d9Chandler Carruth return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); 5450e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis} 5460e870622e4d4b2ecb7bc6ffd2c97f74fd14220b6Argyrios Kyrtzidis 547f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregornamespace { 548f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor enum PreambleDirectiveKind { 549f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_Skipped, 550f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_StartIf, 551f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_EndIf, 552f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PDK_Unknown 553f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor }; 554f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor} 555f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 556f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregorstd::pair<unsigned, bool> 55703c107a42fae79e89d0016999a1a04c07d65591aArgyrios KyrtzidisLexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, 5584e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts, unsigned MaxLines) { 559f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Create a lexer starting at the beginning of the file. Note that we use a 560f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // "fake" file source location at offset 1 so that the lexer will track our 561f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // position within the file. 562f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor const unsigned StartOffset = 1; 5631cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset); 5641cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis Lexer TheLexer(FileLoc, LangOpts, Buffer->getBufferStart(), 565f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Buffer->getBufferStart(), Buffer->getBufferEnd()); 566355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis TheLexer.SetCommentRetentionState(true); 5671cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis 5681cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis // StartLoc will differ from FileLoc if there is a BOM that was skipped. 5691cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis SourceLocation StartLoc = TheLexer.getSourceLocation(); 5701cb7142b66ac1844b91511314cce2b309a9a180dArgyrios Kyrtzidis 571f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor bool InPreprocessorDirective = false; 572f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token TheTok; 573f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token IfStartTok; 574f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor unsigned IfCount = 0; 575355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis SourceLocation ActiveCommentLoc; 576c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis 577c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned MaxLineOffset = 0; 578c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (MaxLines) { 579c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis const char *CurPtr = Buffer->getBufferStart(); 580c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned CurLine = 0; 581c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis while (CurPtr != Buffer->getBufferEnd()) { 582c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis char ch = *CurPtr++; 583c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (ch == '\n') { 584c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis ++CurLine; 585c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (CurLine == MaxLines) 586c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis break; 587c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 588c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 589c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (CurPtr != Buffer->getBufferEnd()) 590c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis MaxLineOffset = CurPtr - Buffer->getBufferStart(); 591c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis } 592df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 593f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor do { 594f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 595f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 596f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (InPreprocessorDirective) { 597f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // If we've hit the end of the file, we're done. 598f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (TheTok.getKind() == tok::eof) { 599f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 600f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 601f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 602f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // If we haven't hit the end of the preprocessor directive, skip this 603f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // token. 604f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (!TheTok.isAtStartOfLine()) 605f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 606f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 607f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We've passed the end of the preprocessor directive, and will look 608f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // at this token again below. 609f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = false; 610f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 611f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 612df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // Keep track of the # of lines in the preamble. 613df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor if (TheTok.isAtStartOfLine()) { 614c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; 615df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 616df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // If we were asked to limit the number of lines in the preamble, 617df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // and we're about to exceed that limit, we're done. 618c8c97a03eb0fdeb4f5fc9c4dea308ebbf46c2c93Argyrios Kyrtzidis if (MaxLineOffset && TokOffset >= MaxLineOffset) 619df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor break; 620df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor } 621df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor 622f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Comments are okay; skip over them. 623355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (TheTok.getKind() == tok::comment) { 624355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (ActiveCommentLoc.isInvalid()) 625355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis ActiveCommentLoc = TheTok.getLocation(); 626f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 627355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis } 628f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 629f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { 630f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // This is the start of a preprocessor directive. 631f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor Token HashTok = TheTok; 632f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = true; 633355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis ActiveCommentLoc = SourceLocation(); 634f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 63519207f1e5f51261a33492602501fb7ada50ea546Joerg Sonnenberger // Figure out which directive this is. Since we're lexing raw tokens, 636f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // we don't have an identifier table available. Instead, just look at 637f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // the raw identifier to recognize and categorize preprocessor directives. 638f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheLexer.LexFromRawLexer(TheTok); 639c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { 6405f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner StringRef Keyword(TheTok.getRawIdentifierData(), 641c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara TheTok.getLength()); 642f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor PreambleDirectiveKind PDK 643f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) 644f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("include", PDK_Skipped) 645f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("__include_macros", PDK_Skipped) 646f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("define", PDK_Skipped) 647f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("undef", PDK_Skipped) 648f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("line", PDK_Skipped) 649f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("error", PDK_Skipped) 650f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("pragma", PDK_Skipped) 651f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("import", PDK_Skipped) 652f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("include_next", PDK_Skipped) 653f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("warning", PDK_Skipped) 654f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ident", PDK_Skipped) 655f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("sccs", PDK_Skipped) 656f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("assert", PDK_Skipped) 657f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("unassert", PDK_Skipped) 658f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("if", PDK_StartIf) 659f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ifdef", PDK_StartIf) 660f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("ifndef", PDK_StartIf) 661f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("elif", PDK_Skipped) 662f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("else", PDK_Skipped) 663f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Case("endif", PDK_EndIf) 664f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor .Default(PDK_Unknown); 665f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 666f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor switch (PDK) { 667f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_Skipped: 668f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 669f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 670f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_StartIf: 671f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (IfCount == 0) 672f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor IfStartTok = HashTok; 673f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 674f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor ++IfCount; 675f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 676f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 677f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_EndIf: 678f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // Mismatched #endif. The preamble ends here. 679f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor if (IfCount == 0) 680f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 681f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 682f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor --IfCount; 683f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor continue; 684f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 685f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor case PDK_Unknown: 686f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We don't know what this directive is; stop at the '#'. 687f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 688f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 689f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 690f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 691f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // We only end up here if we didn't recognize the preprocessor 692f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // directive or it was one that can't occur in the preamble at this 693f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor // point. Roll back the current token to the location of the '#'. 694f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor InPreprocessorDirective = false; 695f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor TheTok = HashTok; 696f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } 697f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 698df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // We hit a token that we don't recognize as being in the 699df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // "preprocessing only" part of the file, so we're no longer in 700df95a13ec73d2cdaea79555cb412d767f4963120Douglas Gregor // the preamble. 701f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor break; 702f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor } while (true); 703f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 704355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis SourceLocation End; 705355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis if (IfCount) 706355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = IfStartTok.getLocation(); 707355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis else if (ActiveCommentLoc.isValid()) 708355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = ActiveCommentLoc; // don't truncate a decl comment. 709355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis else 710355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis End = TheTok.getLocation(); 711355dae6f1b819c42e2b416ea154b03b0aeaf58a3Argyrios Kyrtzidis 712f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), 713f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor IfCount? IfStartTok.isAtStartOfLine() 714f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor : TheTok.isAtStartOfLine()); 715f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor} 716f033f1da4a34f8df6e95e9929dc04ff54bb8fb01Douglas Gregor 7177ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7187ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// AdvanceToTokenCharacter - Given a location that specifies the start of a 7197ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token, return a new location that specifies a character within the token. 7207ef5c27eb6e8ebe58b52013246c06753c3613263Chris LattnerSourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, 7217ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned CharNo, 7227ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const SourceManager &SM, 7234e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 724433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // Figure out how many physical characters away the specified expansion 7257ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // character is. This needs to take into consideration newlines and 7267ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // trigraphs. 7277ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner bool Invalid = false; 7287ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const char *TokPtr = SM.getCharacterData(TokStart, &Invalid); 7297ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7307ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // If they request the first char of the token, we're trivially done. 7317ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) 7327ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return TokStart; 7337ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7347ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned PhysOffset = 0; 7357ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7367ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // The usual case is that tokens don't contain anything interesting. Skip 7377ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // over the uninteresting characters. If a token only consists of simple 7387ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // chars, this method is extremely fast. 7397ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { 7407ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (CharNo == 0) 741a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokStart.getLocWithOffset(PhysOffset); 7427ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner ++TokPtr, --CharNo, ++PhysOffset; 7437ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner } 7447ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7457ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // If we have a character that may be a trigraph or escaped newline, use a 7467ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // lexer to parse it correctly. 7477ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner for (; CharNo; --CharNo) { 7487ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner unsigned Size; 7494e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); 7507ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner TokPtr += Size; 7517ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner PhysOffset += Size; 7527ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner } 7537ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7547ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // Final detail: if we end up on an escaped newline, we want to return the 7557ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // location of the actual byte of the token. For example foo\<newline>bar 7567ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // advanced by 3 should return the location of b, not of \\. One compounding 7577ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner // detail of this is that the escape may be made by a trigraph. 7587ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) 7597ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; 7607ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 761a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokStart.getLocWithOffset(PhysOffset); 7627ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner} 7637ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7647ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// \brief Computes the source location just past the end of the 7657ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token at this source location. 7667ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// 7677ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// This routine can be used to produce a source location that 7687ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// points just past the end of the token referenced by \p Loc, and 7697ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// is generally used when a diagnostic needs to point just after a 7707ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// token where it expected something different that it received. If 7717ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// the returned source location would not be meaningful (e.g., if 7727ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// it points into a macro), this routine returns an invalid 7737ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// source location. 7747ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// 7757ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// \param Offset an offset from the end of the token, where the source 7767ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// location should refer to. The default offset (0) produces a source 7777ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// location pointing just past the end of the token; an offset of 1 produces 7787ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner/// a source location pointing to the last character in the token, etc. 7797ef5c27eb6e8ebe58b52013246c06753c3613263Chris LattnerSourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, 7807ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner const SourceManager &SM, 7814e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 7827ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis if (Loc.isInvalid()) 7837ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return SourceLocation(); 7847ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis 7857ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis if (Loc.isMacroID()) { 7864e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) 787433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth return SourceLocation(); // Points inside the macro expansion. 7887ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis } 7897ddf6b2d77ac4cb27f78d817d7884e6ce17afd0cArgyrios Kyrtzidis 7904e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts); 7917ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner if (Len > Offset) 7927ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner Len = Len - Offset; 7937ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner else 7947ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner return Loc; 7957ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 796a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return Loc.getLocWithOffset(Len); 7977ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner} 7987ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner 7997a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis/// \brief Returns true if the given MacroID location points at the first 800433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// token of the macro expansion. 801433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruthbool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, 802f62d43d2afe1960755a1b5813cae1e5983bcac1bDouglas Gregor const SourceManager &SM, 80369bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const LangOptions &LangOpts, 80469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis SourceLocation *MacroBegin) { 8057a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); 8067a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 807c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation expansionLoc; 808c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) 809c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return false; 810c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis 81169bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (expansionLoc.isFileID()) { 81269bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis // No other macro expansions, this is the first. 81369bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (MacroBegin) 81469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis *MacroBegin = expansionLoc; 81569bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return true; 81669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis } 8177a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 81869bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin); 8197a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis} 8207a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 8217a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis/// \brief Returns true if the given MacroID location points at the last 822433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// token of the macro expansion. 823433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruthbool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, 82469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const SourceManager &SM, 82569bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis const LangOptions &LangOpts, 82669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis SourceLocation *MacroEnd) { 8277a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); 8287a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 8297a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis SourceLocation spellLoc = SM.getSpellingLoc(loc); 8307a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis unsigned tokLen = MeasureTokenLength(spellLoc, SM, LangOpts); 8317a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis if (tokLen == 0) 8327a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis return false; 8337a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 834c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation afterLoc = loc.getLocWithOffset(tokLen); 835c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis SourceLocation expansionLoc; 836c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) 837c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return false; 838f8c50652f7b224e66b0b6098d1fba07e036019b4Argyrios Kyrtzidis 83969bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (expansionLoc.isFileID()) { 84069bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis // No other macro expansions. 84169bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (MacroEnd) 84269bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis *MacroEnd = expansionLoc; 84369bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return true; 84469bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis } 8457a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 84669bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd); 8477a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis} 8487a759606d93975866051f67104ae58446e55f404Argyrios Kyrtzidis 849a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidisstatic CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, 850d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis const SourceManager &SM, 851d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis const LangOptions &LangOpts) { 852a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation Begin = Range.getBegin(); 853a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation End = Range.getEnd(); 854d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis assert(Begin.isFileID() && End.isFileID()); 855a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (Range.isTokenRange()) { 856a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); 857a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (End.isInvalid()) 858a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return CharSourceRange(); 859a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis } 860d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 861d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis // Break down the source locations. 862d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis FileID FID; 863d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis unsigned BeginOffs; 864d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); 865d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (FID.isInvalid()) 866d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 867d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 868d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis unsigned EndOffs; 869d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (!SM.isInFileID(End, FID, &EndOffs) || 870d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis BeginOffs > EndOffs) 871d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 872d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 873d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange::getCharRange(Begin, End); 874d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis} 875d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 876a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios KyrtzidisCharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, 87711b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis const SourceManager &SM, 87811b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis const LangOptions &LangOpts) { 879a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation Begin = Range.getBegin(); 880a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis SourceLocation End = Range.getEnd(); 881d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isInvalid() || End.isInvalid()) 88211b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis return CharSourceRange(); 88311b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 884d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isFileID() && End.isFileID()) 885a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 886d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 887d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isMacroID() && End.isFileID()) { 88811b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin)) 88911b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis return CharSourceRange(); 890a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setBegin(Begin); 891a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 892d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 89311b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 894d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (Begin.isFileID() && End.isMacroID()) { 895a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, 896a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &End)) || 897a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, 898a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &End))) 899d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 900a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setEnd(End); 901a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 902d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 90311b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 904d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis assert(Begin.isMacroID() && End.isMacroID()); 905d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis SourceLocation MacroBegin, MacroEnd; 906d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) && 907a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts, 908a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &MacroEnd)) || 909a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts, 910a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis &MacroEnd)))) { 911a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setBegin(MacroBegin); 912a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range.setEnd(MacroEnd); 913a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis return makeRangeFromFileLocs(Range, SM, LangOpts); 914a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis } 915d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 916c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis bool Invalid = false; 917c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), 918c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis &Invalid); 919c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (Invalid) 920e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return CharSourceRange(); 921e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 922c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (BeginEntry.getExpansion().isMacroArgExpansion()) { 923c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), 924c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis &Invalid); 925c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (Invalid) 926c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return CharSourceRange(); 92711b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 928c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis if (EndEntry.getExpansion().isMacroArgExpansion() && 929c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis BeginEntry.getExpansion().getExpansionLocStart() == 930c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis EndEntry.getExpansion().getExpansionLocStart()) { 931c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis Range.setBegin(SM.getImmediateSpellingLoc(Begin)); 932c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis Range.setEnd(SM.getImmediateSpellingLoc(End)); 933c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis return makeFileCharRange(Range, SM, LangOpts); 934c50c6ff49aa3648ae031349de6f09439f52425f0Argyrios Kyrtzidis } 935d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis } 936d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis 937d9806c912ae3e870a733acfd83c26e8a1f6a5ffcArgyrios Kyrtzidis return CharSourceRange(); 93811b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis} 93911b652d41d0d97380ab321a1dba48ecb044f9de8Argyrios Kyrtzidis 940e64d9037658a1b95c79ea275af6167a110b3c563Argyrios KyrtzidisStringRef Lexer::getSourceText(CharSourceRange Range, 941e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis const SourceManager &SM, 942e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis const LangOptions &LangOpts, 943e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis bool *Invalid) { 944a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis Range = makeFileCharRange(Range, SM, LangOpts); 945a83f4d2315dbeb3914868f1ccb8e74fb2ccdbb0cArgyrios Kyrtzidis if (Range.isInvalid()) { 946e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 947e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 948e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 949e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 950e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis // Break down the source location. 951e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin()); 952e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (beginInfo.first.isInvalid()) { 953e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 954e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 955e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 956e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 957e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis unsigned EndOffs; 958e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || 959e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis beginInfo.second > EndOffs) { 960e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 961e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 962e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 963e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 964e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis // Try to the load the file buffer. 965e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis bool invalidTemp = false; 966e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); 967e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (invalidTemp) { 968e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = true; 969e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return StringRef(); 970e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis } 971e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 972e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis if (Invalid) *Invalid = false; 973e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis return file.substr(beginInfo.second, EndOffs - beginInfo.second); 974e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis} 975e64d9037658a1b95c79ea275af6167a110b3c563Argyrios Kyrtzidis 976c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna ZaksStringRef Lexer::getImmediateMacroName(SourceLocation Loc, 977c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks const SourceManager &SM, 978c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks const LangOptions &LangOpts) { 979c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks assert(Loc.isMacroID() && "Only reasonble to call this on macros"); 9807f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9817f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Find the location of the immediate macro expansion. 9827f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis while (1) { 9837f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis FileID FID = SM.getFileID(Loc); 9847f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); 9857f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 9867f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = Expansion.getExpansionLocStart(); 9877f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (!Expansion.isMacroArgExpansion()) 9887f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; 9897f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9907f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // For macro arguments we need to check that the argument did not come 9917f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // from an inner macro, e.g: "MAC1( MAC2(foo) )" 9927f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 9937f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Loc points to the argument id of the macro definition, move to the 9947f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // macro expansion. 995c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks Loc = SM.getImmediateExpansionRange(Loc).first; 9967f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis SourceLocation SpellLoc = Expansion.getSpellingLoc(); 9977f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (SpellLoc.isFileID()) 9987f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; // No inner macro. 9997f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 10007f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // If spelling location resides in the same FileID as macro expansion 10017f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // location, it means there is no inner macro. 10027f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis FileID MacroFID = SM.getFileID(Loc); 10037f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis if (SM.isInFileID(SpellLoc, MacroFID)) 10047f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis break; 10057f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis 10067f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis // Argument came from inner macro. 10077f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = SpellLoc; 10087f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis } 1009c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 1010c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // Find the spelling location of the start of the non-argument expansion 1011c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // range. This is where the macro name was spelled in order to begin 1012c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // expanding this macro. 10137f6cf9764b33381e03fcf7c44f7985a333212b06Argyrios Kyrtzidis Loc = SM.getSpellingLoc(Loc); 1014c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 1015c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // Dig out the buffer where the macro name was spelled and the extents of the 1016c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks // name so that we can render it into the expansion note. 1017c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); 1018c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); 1019c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); 1020c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); 1021c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks} 1022c2a8d6cee01fc4845f5409bf5c021a64616ac8c3Anna Zaks 1023d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rosebool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { 10249893902eceba7f01dd1521349d33866f77254d78Jordan Rose return isIdentifierBody(c, LangOpts.DollarIdents); 1025d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rose} 1026d880b3aa6d594d1a7f2d307c29378c6f59b216ffJordan Rose 10275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Diagnostics forwarding code. 10305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1032409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the 1033433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth/// lexer buffer was all expanded at a single point, perform the mapping. 1034409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// This is currently only used for _Pragma implementation, so it is the slow 1035409a03671224d4e5bdab1594c43baf070148f830Chris Lattner/// path of the hot getSourceLocation method. Do not allow it to be inlined. 103614bd96571ef6f0e97dc79ec4d01b547d60e8fa68Chandler Carruthstatic LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc( 103714bd96571ef6f0e97dc79ec4d01b547d60e8fa68Chandler Carruth Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen); 1038409a03671224d4e5bdab1594c43baf070148f830Chris Lattnerstatic SourceLocation GetMappedTokenLoc(Preprocessor &PP, 1039409a03671224d4e5bdab1594c43baf070148f830Chris Lattner SourceLocation FileLoc, 1040de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner unsigned CharNo, unsigned TokLen) { 1041433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth assert(FileLoc.isMacroID() && "Must be a macro expansion"); 10421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1043409a03671224d4e5bdab1594c43baf070148f830Chris Lattner // Otherwise, we're lexing "mapped tokens". This is used for things like 1044433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // _Pragma handling. Combine the expansion location of FileLoc with the 1045df7c17a8d02fe09a3466786bae3e40fc3252687aChris Lattner // spelling location. 1046e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner SourceManager &SM = PP.getSourceManager(); 10471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1048433db06b614f26dc6829e86d6ff469e2cca7d4f9Chandler Carruth // Create a new SLoc which is expanded from Expansion(FileLoc) but whose 1049df7c17a8d02fe09a3466786bae3e40fc3252687aChris Lattner // characters come from spelling(FileLoc)+Offset. 1050e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc); 1051a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SpellingLoc = SpellingLoc.getLocWithOffset(CharNo); 10521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1053e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner // Figure out the expansion loc range, which is the range covered by the 1054e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner // original _Pragma(...) sequence. 1055e7fb48466afcbf2c4ccdfa658824282fdc3c512cChris Lattner std::pair<SourceLocation,SourceLocation> II = 1056999f739404edf2078cf9f9c28b4dc45c19765842Chandler Carruth SM.getImmediateExpansionRange(FileLoc); 10571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1058bf340e452339e374ea6eef78c1f0a2abdd16c5a3Chandler Carruth return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen); 1059409a03671224d4e5bdab1594c43baf070148f830Chris Lattner} 1060409a03671224d4e5bdab1594c43baf070148f830Chris Lattner 10615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getSourceLocation - Return a source location identifier for the specified 10625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// offset in the current file. 1063de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris LattnerSourceLocation Lexer::getSourceLocation(const char *Loc, 1064de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner unsigned TokLen) const { 1065448cec4c1c3705f6f49ffdefb58a7329942a2dd8Chris Lattner assert(Loc >= BufferStart && Loc <= BufferEnd && 10665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer "Location out of range for this buffer!"); 10679dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner 10689dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner // In the normal case, we're just lexing from a simple file buffer, return 10699dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner // the file id from FileLoc with the offset specified. 1070448cec4c1c3705f6f49ffdefb58a7329942a2dd8Chris Lattner unsigned CharNo = Loc-BufferStart; 10719dc1f530c086d2c16f8cba758b0f59a5bf41323aChris Lattner if (FileLoc.isFileID()) 1072a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return FileLoc.getLocWithOffset(CharNo); 10731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 10742b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner // Otherwise, this is the _Pragma lexer case, which pretends that all of the 10752b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner // tokens are lexed from where the _Pragma was defined. 1076168ae2d44a443da75ea85db5f3b5081eb0bce113Chris Lattner assert(PP && "This doesn't work on raw lexers"); 1077de7aeefc5573d669ed476d7bda7a8940d3bcadb7Chris Lattner return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen); 10785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 10795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Diag - Forwarding function for diagnostics. This translate a source 10815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// position in the current buffer into a SourceLocation object for rendering. 10823cbfe2c4159e0a219ae660d50625c013aa4afbd0Chris LattnerDiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const { 10833692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner return PP->Diag(getSourceLocation(Loc), DiagID); 10845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 10855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Trigraph and Escaped Newline Handling Code. 10885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 10895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, 10915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// return the decoded trigraph letter it corresponds to, or '\0' if nothing. 10925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic char GetTrigraphCharForLetter(char Letter) { 10935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Letter) { 10945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: return 0; 10955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '=': return '#'; 10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ')': return ']'; 10975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '(': return '['; 10985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '!': return '|'; 10995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\'': return '^'; 11005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '>': return '}'; 11015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '/': return '\\'; 11025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '<': return '{'; 11035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '-': return '~'; 11045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 11055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 11065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 11075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// DecodeTrigraphChar - If the specified character is a legal trigraph when 11085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled, 11095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// return the result character. Finally, emit a warning about trigraph use 11105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// whether trigraphs are enabled or not. 11115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic char DecodeTrigraphChar(const char *CP, Lexer *L) { 11125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Res = GetTrigraphCharForLetter(*CP); 11133692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner if (!Res || !L) return Res; 11141eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 11154e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!L->getLangOpts().Trigraphs) { 111674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 111774d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CP-2, diag::trigraph_ignored); 11183692b09faa9fe346f39bc922db6dce48cdcc3f63Chris Lattner return 0; 11195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 11201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 112174d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 11225f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); 11235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return Res; 11245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 11255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 112624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner/// getEscapedNewLineSize - Return the size of the specified escaped newline, 112724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a 11281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// trigraph equivalent on entry to this function. 112924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattnerunsigned Lexer::getEscapedNewLineSize(const char *Ptr) { 113024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner unsigned Size = 0; 113124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner while (isWhitespace(Ptr[Size])) { 113224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner ++Size; 11331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 113424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') 113524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner continue; 113624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 113724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // If this is a \r\n or \n\r, skip the other half. 113824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && 113924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr[Size-1] != Ptr[Size]) 114024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner ++Size; 11411eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 114224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return Size; 11431eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11441eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 114524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Not an escaped newline, must be a \t or something else. 114624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return 0; 114724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner} 114824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 1149033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// SkipEscapedNewLines - If P points to an escaped newline (or a series of 1150033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// them), skip over them and return the first non-escaped-newline found, 1151033749571f8d4c804eeb357c70b06424aa24503bChris Lattner/// otherwise return P. 1152033749571f8d4c804eeb357c70b06424aa24503bChris Lattnerconst char *Lexer::SkipEscapedNewLines(const char *P) { 1153033749571f8d4c804eeb357c70b06424aa24503bChris Lattner while (1) { 1154033749571f8d4c804eeb357c70b06424aa24503bChris Lattner const char *AfterEscape; 1155033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (*P == '\\') { 1156033749571f8d4c804eeb357c70b06424aa24503bChris Lattner AfterEscape = P+1; 1157033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } else if (*P == '?') { 1158033749571f8d4c804eeb357c70b06424aa24503bChris Lattner // If not a trigraph for escape, bail out. 1159033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (P[1] != '?' || P[2] != '/') 1160033749571f8d4c804eeb357c70b06424aa24503bChris Lattner return P; 1161033749571f8d4c804eeb357c70b06424aa24503bChris Lattner AfterEscape = P+3; 1162033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } else { 1163033749571f8d4c804eeb357c70b06424aa24503bChris Lattner return P; 1164033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } 11651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1166033749571f8d4c804eeb357c70b06424aa24503bChris Lattner unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape); 1167033749571f8d4c804eeb357c70b06424aa24503bChris Lattner if (NewLineSize == 0) return P; 1168033749571f8d4c804eeb357c70b06424aa24503bChris Lattner P = AfterEscape+NewLineSize; 1169033749571f8d4c804eeb357c70b06424aa24503bChris Lattner } 1170033749571f8d4c804eeb357c70b06424aa24503bChris Lattner} 1171033749571f8d4c804eeb357c70b06424aa24503bChris Lattner 1172aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// \brief Checks that the given token is the first token that occurs after the 1173aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// given location (this excludes comments and whitespace). Returns the location 1174aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// immediately after the specified token. If the token is not found or the 1175aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks/// location is inside a macro, the returned source location will be invalid. 1176aca25bccefe56121b686706afc84c8cb5d46e65bAnna ZaksSourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, 1177aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks tok::TokenKind TKind, 1178aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const SourceManager &SM, 1179aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const LangOptions &LangOpts, 1180aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks bool SkipTrailingWhitespaceAndNewLine) { 1181aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (Loc.isMacroID()) { 118269bda4c027671df7163619f215209529eb236620Argyrios Kyrtzidis if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) 1183aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1184aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 1185aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); 1186aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1187aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Break down the source location. 1188aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 1189aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1190aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Try to load the file buffer. 1191aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks bool InvalidTemp = false; 1192cfa88f893915ceb8ae4ce2f17c46c24a4d67502fDmitri Gribenko StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); 1193aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (InvalidTemp) 1194aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1195aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1196aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const char *TokenBegin = File.data() + LocInfo.second; 1197aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1198aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Lex from the start of the given location. 1199aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), 1200aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks TokenBegin, File.end()); 1201aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Find the token. 1202aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Token Tok; 1203aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks lexer.LexFromRawLexer(Tok); 1204aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (Tok.isNot(TKind)) 1205aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks return SourceLocation(); 1206aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks SourceLocation TokenLoc = Tok.getLocation(); 1207aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1208aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks // Calculate how much whitespace needs to be skipped if any. 1209aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks unsigned NumWhitespaceChars = 0; 1210aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks if (SkipTrailingWhitespaceAndNewLine) { 1211aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks const char *TokenEnd = SM.getCharacterData(TokenLoc) + 1212aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks Tok.getLength(); 1213aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks unsigned char C = *TokenEnd; 1214aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks while (isHorizontalWhitespace(C)) { 1215aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks C = *(++TokenEnd); 1216aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks NumWhitespaceChars++; 1217aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 121835a2b798efd61fec425553f387d76be9c522f184Eli Friedman 121935a2b798efd61fec425553f387d76be9c522f184Eli Friedman // Skip \r, \n, \r\n, or \n\r 122035a2b798efd61fec425553f387d76be9c522f184Eli Friedman if (C == '\n' || C == '\r') { 122135a2b798efd61fec425553f387d76be9c522f184Eli Friedman char PrevC = C; 122235a2b798efd61fec425553f387d76be9c522f184Eli Friedman C = *(++TokenEnd); 1223aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks NumWhitespaceChars++; 122435a2b798efd61fec425553f387d76be9c522f184Eli Friedman if ((C == '\n' || C == '\r') && C != PrevC) 122535a2b798efd61fec425553f387d76be9c522f184Eli Friedman NumWhitespaceChars++; 122635a2b798efd61fec425553f387d76be9c522f184Eli Friedman } 1227aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks } 1228aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks 1229a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars); 1230aca25bccefe56121b686706afc84c8cb5d46e65bAnna Zaks} 123124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 12325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, 12335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// get its size, and return it. This is tricky in several cases: 12345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 1. If currently at the start of a trigraph, we warn about the trigraph, 12355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// then either return the trigraph (skipping 3 chars) or the '?', 12365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// depending on whether trigraphs are enabled or not. 12375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 2. If this is an escaped newline (potentially with whitespace between 12385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the backslash and newline), implicitly skip the newline and return 12395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the char after it. 12405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// This handles the slow/uncommon case of the getCharAndSize method. Here we 12425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// know that we can accumulate into Size, and that we have already incremented 12435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Ptr by Size bytes. 12445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should 12465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// be updated to match. 12475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 12485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerchar Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, 1249d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token *Tok) { 12505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, look for an escaped newline. 12515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '\\') { 12525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 12535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Ptr; 12545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerSlash: 12555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Common case, backslash-char where the char is not whitespace. 12565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (!isWhitespace(Ptr[0])) return '\\'; 12571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12585636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner // See if we have optional whitespace characters between the slash and 12595636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner // newline. 126024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { 126124f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Remember that this token needs to be cleaned. 126224f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (Tok) Tok->setFlag(Token::NeedsCleaning); 126324f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner 126424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Warn if there was whitespace between the backslash and newline. 12655636a3b6ece2c1f413464b72545e08eb0b7f06e4Chris Lattner if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode()) 126624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Diag(Ptr, diag::backslash_newline_space); 12671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 126824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Found backslash<whitespace><newline>. Parse the char after it. 126924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Size += EscapedNewLineSize; 127024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr += EscapedNewLineSize; 1271f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 127204a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // If the char that we finally got was a \n, then we must have had 127304a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // something like \<newline><newline>. We don't want to consume the 127404a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // second newline. 127504a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') 127604a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis return ' '; 1277f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 127824f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Use slow version to accumulate a correct size field. 127924f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner return getCharAndSizeSlow(Ptr, Size, Tok); 128024f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner } 12811eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is not an escaped newline, just return the slash. 12835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return '\\'; 12845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 12851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 12865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a trigraph, process it. 12875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '?' && Ptr[1] == '?') { 12885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is actually a legal trigraph (not something like "??x"), emit 12895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // a trigraph warning. If so, and if trigraphs are enabled, return it. 12905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) { 12915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Remember that this token needs to be cleaned. 1292d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner if (Tok) Tok->setFlag(Token::NeedsCleaning); 12935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 12945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Ptr += 3; 12955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Size += 3; 12965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\\') goto Slash; 12975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return C; 12985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 12995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13001eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is neither, return a single character. 13025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return *Ptr; 13045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 13055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the 13085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, 13095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// and that we have already incremented Ptr by Size bytes. 13105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// 13115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// NOTE: When this method is updated, getCharAndSizeSlow (above) should 13125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// be updated to match. 13135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerchar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, 13144e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie const LangOptions &LangOpts) { 13155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, look for an escaped newline. 13165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Ptr[0] == '\\') { 13175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Ptr; 13195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerSlash: 13205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Common case, backslash-char where the char is not whitespace. 13215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (!isWhitespace(Ptr[0])) return '\\'; 13221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // See if we have optional whitespace characters followed by a newline. 132424f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { 132524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Found backslash<whitespace><newline>. Parse the char after it. 132624f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Size += EscapedNewLineSize; 132724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner Ptr += EscapedNewLineSize; 13281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 132904a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // If the char that we finally got was a \n, then we must have had 133004a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // something like \<newline><newline>. We don't want to consume the 133104a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis // second newline. 133204a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0') 133304a94bcc56438b17e88db592708324041f75d48cArgyrios Kyrtzidis return ' '; 1334f132dcaae82ebfc44c4fe0e84bf0b1f95e9d1251Argyrios Kyrtzidis 133524f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner // Use slow version to accumulate a correct size field. 13364e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); 133724f0e48c0aa62f2268e061aad70f9b19a59e7b52Chris Lattner } 13381eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is not an escaped newline, just return the slash. 13405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return '\\'; 13415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a trigraph, process it. 13444e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { 13455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is actually a legal trigraph (not something like "??x"), return 13465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it. 13475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (char C = GetTrigraphCharForLetter(Ptr[2])) { 13485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Ptr += 3; 13495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Size += 3; 13505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\\') goto Slash; 13515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return C; 13525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 13541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 13555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is neither, return a single character. 13565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++Size; 13575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return *Ptr; 13585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 13595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 13605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 13615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Helper methods for lexing. 13625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 13635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1364f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor/// \brief Routine that indiscriminately skips bytes in the source file. 1365f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregorvoid Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { 1366f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor BufferPtr += Bytes; 1367f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor if (BufferPtr > BufferEnd) 1368f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor BufferPtr = BufferEnd; 1369d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // FIXME: What exactly does the StartOfLine bit mean? There are two 1370d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // possible meanings for the "start" of the line: the first token on the 1371d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // unexpanded line, or the first token on the expanded line. 1372f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor IsAtStartOfLine = StartOfLine; 1373d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = StartOfLine; 1374f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor} 1375f4f6c9db68465b886ec2e596feaa6ecc782395a4Douglas Gregor 1376ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { 1377263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (LangOpts.CPlusPlus11 || LangOpts.C11) { 1378263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C11AllowedIDChars( 1379263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C11AllowedIDCharRanges); 1380263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return C11AllowedIDChars.contains(C); 1381263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (LangOpts.CPlusPlus) { 1382263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( 1383263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko CXX03AllowedIDCharRanges); 1384263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return CXX03AllowedIDChars.contains(C); 1385263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else { 1386263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99AllowedIDChars( 1387263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99AllowedIDCharRanges); 1388263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return C99AllowedIDChars.contains(C); 1389263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } 1390c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 1391c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1392ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { 1393ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose assert(isAllowedIDChar(C, LangOpts)); 1394263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (LangOpts.CPlusPlus11 || LangOpts.C11) { 1395263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( 1396263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C11DisallowedInitialIDCharRanges); 1397263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return !C11DisallowedInitialIDChars.contains(C); 1398263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (LangOpts.CPlusPlus) { 1399ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose return true; 1400263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else { 1401263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( 1402263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99DisallowedInitialIDCharRanges); 1403263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko return !C99DisallowedInitialIDChars.contains(C); 1404263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } 1405ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose} 1406ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1407ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, 1408ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose const char *End) { 1409ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose return CharSourceRange::getCharRange(L.getSourceLocation(Begin), 1410ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose L.getSourceLocation(End)); 1411ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose} 1412ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1413ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rosestatic void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, 1414ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CharSourceRange Range, bool IsFirst) { 1415ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // Check C99 compatibility. 1416ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (Diags.getDiagnosticLevel(diag::warn_c99_compat_unicode_id, 1417ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Range.getBegin()) > DiagnosticsEngine::Ignored) { 1418ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose enum { 1419ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CannotAppearInIdentifier = 0, 1420ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CannotStartIdentifier 1421ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose }; 1422ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1423263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99AllowedIDChars( 1424263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99AllowedIDCharRanges); 1425263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( 1426263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko C99DisallowedInitialIDCharRanges); 1427263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (!C99AllowedIDChars.contains(C)) { 1428ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) 1429ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range 1430ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << CannotAppearInIdentifier; 1431263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) { 1432ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) 1433ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range 1434ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << CannotStartIdentifier; 1435ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1436c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 1437c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1438ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // Check C++98 compatibility. 1439ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_unicode_id, 1440ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Range.getBegin()) > DiagnosticsEngine::Ignored) { 1441263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( 1442263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko CXX03AllowedIDCharRanges); 1443263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko if (!CXX03AllowedIDChars.contains(C)) { 1444ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) 1445ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << Range; 1446ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1447ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1448ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1449c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1450d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexIdentifier(Token &Result, const char *CurPtr) { 14515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] 14525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned Size; 14535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned char C = *CurPtr++; 1454cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner while (isIdentifierBody(C)) 14555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 1456cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner 14575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; // Back up over the skipped character. 14585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 14595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Fast path, no $,\,? in identifier found. '\' might be an escaped newline 14605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. 1461cd991dbb12f24368753cef086c7ad3ec203c9ea6Chris Lattner // 14629893902eceba7f01dd1521349d33866f77254d78Jordan Rose // TODO: Could merge these checks into an InfoTable flag to make the 14639893902eceba7f01dd1521349d33866f77254d78Jordan Rose // comparison cheaper 1464c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (isASCII(C) && C != '\\' && C != '?' && 1465c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (C != '$' || !LangOpts.DollarIdents)) { 14665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerFinishIdentifier: 14675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *IdStart = BufferPtr; 1468c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara FormTokenWithChars(Result, CurPtr, tok::raw_identifier); 1469c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara Result.setRawIdentifierData(IdStart); 14701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 14715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are in raw mode, return this identifier raw. There is no need to 14725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // look up identifier information or attempt to macro expand it. 1473c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara if (LexingRawMode) 1474d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 14751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1476c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // Fill in Result.IdentifierInfo and update the token kind, 1477c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara // looking up the identifier in the identifier table. 1478c4bf2b9afb7d47445a9dc6bc848657098a4e3851Abramo Bagnara IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); 14791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 14805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Finally, now that we know we have an identifier, pass this off to the 14815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // preprocessor, which may macro expand it or something. 1482d1186fa38166a581b51975f0382a45fc3a0733d0Chris Lattner if (II->isHandleIdentifierCase()) 1483d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return PP->HandleIdentifier(Result); 14846aa52ec6b969faabf3764baf79d89810b8249a7eDouglas Gregor 1485d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 14865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 14871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 14885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, $,\,? in identifier found. Enter slower path. 14891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 14905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 14915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 14925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '$') { 14935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we hit a $ and they are not supported in identifiers, we are done. 14944e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!LangOpts.DollarIdents) goto FinishIdentifier; 14951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 14965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, emit a diagnostic and continue. 149774d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 149874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr, diag::ext_dollar_in_identifier); 14995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer continue; 1502c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1503c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else if (C == '\\') { 1504c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *UCNPtr = CurPtr + Size; 1505c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0); 1506ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) 1507c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose goto FinishIdentifier; 1508c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1509ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (!isLexingRawMode()) { 1510ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, 1511ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose makeCharRange(*this, CurPtr, UCNPtr), 1512ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose /*IsFirst=*/false); 1513ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1514ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1515c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Result.setFlag(Token::HasUCN); 1516c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || 1517c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) 1518c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CurPtr = UCNPtr; 1519c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else 1520c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose while (CurPtr != UCNPtr) 1521c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (void)getAndAdvanceChar(CurPtr, Result); 1522c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1523c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose C = getCharAndSize(CurPtr, Size); 1524c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose continue; 1525c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else if (!isASCII(C)) { 1526c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *UnicodePtr = CurPtr; 1527c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose UTF32 CodePoint; 1528cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko ConversionResult Result = 1529cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, 1530cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko (const UTF8 *)BufferEnd, 1531cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko &CodePoint, 1532cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko strictConversion); 1533c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result != conversionOK || 1534ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) 1535c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose goto FinishIdentifier; 1536c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 1537ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (!isLexingRawMode()) { 1538ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, 1539ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose makeCharRange(*this, CurPtr, UnicodePtr), 1540ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose /*IsFirst=*/false); 1541ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 1542ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 1543c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CurPtr = UnicodePtr; 1544c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose C = getCharAndSize(CurPtr, Size); 1545c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose continue; 1546c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else if (!isIdentifierBody(C)) { 15475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer goto FinishIdentifier; 15485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this character is good, consume it. 15515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 1554c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose while (isIdentifierBody(C)) { 15555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 15605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1561a75ec43d625753b4439b0d6f70bd988444c74617Douglas Gregor/// isHexaLiteral - Return true if Start points to a hex constant. 15624a551000bee716ac8b1bbe16134a53f0ad221a5aChris Lattner/// in microsoft mode (where this is supposed to be several different tokens). 1563e506f8a41063410c75945ebb804758bd0202947fEli Friedmanbool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { 15646ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner unsigned Size; 15654e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); 15666ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner if (C1 != '0') 15676ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner return false; 15684e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); 15696ab55ebab20086f725c4017b48ef8d7691ef870fChris Lattner return (C2 == 'x' || C2 == 'X'); 1570a75ec43d625753b4439b0d6f70bd988444c74617Douglas Gregor} 15715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15725253c7ff266ae79308050c9f43d60dd1a67c5fb9Nate Begeman/// LexNumericConstant - Lex the remainder of a integer or floating point 15735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// constant. From[-1] is the first character lexed. Return the end of the 15745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// constant. 1575d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { 15765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned Size; 15775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getCharAndSize(CurPtr, Size); 15785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char PrevCh = 0; 15799893902eceba7f01dd1521349d33866f77254d78Jordan Rose while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix. 15805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, Size, Result); 15815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer PrevCh = C; 15825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getCharAndSize(CurPtr, Size); 15835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 15841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 15855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we fell out, check for a sign, due to 1e+12. If we have one, continue. 1586b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { 1587b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner // If we are in Microsoft mode, don't continue if the constant is hex. 1588b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1 15894e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts)) 1590b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); 1591b2f4a20ddc281b194caa00e850ad74759e0d50baChris Lattner } 15925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 15935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a hex FP constant, continue. 1594d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { 1595d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith // Outside C99, we accept hexadecimal floating point numbers as a 1596d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith // not-quite-conforming extension. Only do so if this looks like it's 1597d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith // actually meant to be a hexfloat, and not if it has a ud-suffix. 1598d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith bool IsHexFloat = true; 1599d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (!LangOpts.C99) { 1600d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (!isHexaLiteral(BufferPtr, LangOpts)) 1601d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith IsHexFloat = false; 1602d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith else if (std::find(BufferPtr, CurPtr, '_') != CurPtr) 1603d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith IsHexFloat = false; 1604d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith } 1605d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith if (IsHexFloat) 1606d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); 1607d2e95d1538ff91fe902464f02f83429f96117af5Richard Smith } 16081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1609859b6227694033dd6eaf3991a2b80877a406c382Richard Smith // If we have a digit separator, continue. 1610859b6227694033dd6eaf3991a2b80877a406c382Richard Smith if (C == '\'' && getLangOpts().CPlusPlus1y) { 1611859b6227694033dd6eaf3991a2b80877a406c382Richard Smith unsigned NextSize; 1612859b6227694033dd6eaf3991a2b80877a406c382Richard Smith char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts()); 1613859b6227694033dd6eaf3991a2b80877a406c382Richard Smith if (isAlphanumeric(Next)) { 1614859b6227694033dd6eaf3991a2b80877a406c382Richard Smith if (!isLexingRawMode()) 1615859b6227694033dd6eaf3991a2b80877a406c382Richard Smith Diag(CurPtr, diag::warn_cxx11_compat_digit_separator); 1616859b6227694033dd6eaf3991a2b80877a406c382Richard Smith CurPtr = ConsumeChar(CurPtr, Size, Result); 1617859b6227694033dd6eaf3991a2b80877a406c382Richard Smith return LexNumericConstant(Result, CurPtr); 1618859b6227694033dd6eaf3991a2b80877a406c382Richard Smith } 1619859b6227694033dd6eaf3991a2b80877a406c382Richard Smith } 1620859b6227694033dd6eaf3991a2b80877a406c382Richard Smith 16215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 162247246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 16239e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::numeric_constant); 162447246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1625d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 16265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 16275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 16285cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes 1629e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith/// in C++11, or warn on a ud-suffix in C++98. 16304ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smithconst char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, 16314ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith bool IsStringLiteral) { 16324e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie assert(getLangOpts().CPlusPlus); 16335cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 16345cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // Maximally munch an identifier. FIXME: UCNs. 16355cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith unsigned Size; 16365cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith char C = getCharAndSize(CurPtr, Size); 16375cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith if (isIdentifierHead(C)) { 163880ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (!getLangOpts().CPlusPlus11) { 1639e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith if (!isLexingRawMode()) 16402fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith Diag(CurPtr, 16412fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith C == '_' ? diag::warn_cxx11_compat_user_defined_literal 16422fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith : diag::warn_cxx11_compat_reserved_user_defined_literal) 16432fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); 16442fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith return CurPtr; 16452fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith } 16462fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith 16472fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix 16482fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith // that does not start with an underscore is ill-formed. As a conforming 16492fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith // extension, we treat all such suffixes as if they had whitespace before 16502fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith // them. 16514ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith bool IsUDSuffix = false; 16524ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith if (C == '_') 16534ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith IsUDSuffix = true; 165406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith else if (IsStringLiteral && getLangOpts().CPlusPlus1y) { 165506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // In C++1y, we need to look ahead a few characters to see if this is a 165606dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // valid suffix for a string literal or a numeric literal (this could be 165706dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // the 'operator""if' defining a numeric literal operator). 16586fde25e14e8d89080fb9f32b7c0d65f869bb06eaRichard Smith const unsigned MaxStandardSuffixLength = 3; 165906dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith char Buffer[MaxStandardSuffixLength] = { C }; 166006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned Consumed = Size; 166106dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned Chars = 1; 166206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith while (true) { 166306dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith unsigned NextSize; 166406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, 166506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith getLangOpts()); 166606dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith if (!isIdentifierBody(Next)) { 166706dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // End of suffix. Check whether this is on the whitelist. 166806dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith IsUDSuffix = (Chars == 1 && Buffer[0] == 's') || 166906dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith NumericLiteralParser::isValidUDSuffix( 167006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith getLangOpts(), StringRef(Buffer, Chars)); 167106dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith break; 167206dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith } 167306dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith 167406dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith if (Chars == MaxStandardSuffixLength) 167506dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith // Too long: can't be a standard suffix. 167606dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith break; 167706dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith 167806dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith Buffer[Chars++] = Next; 167906dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith Consumed += NextSize; 168006dd2b317a4bf282f3ac526b5cc4d74c39ed7cedRichard Smith } 16814ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith } 16824ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith 16834ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith if (!IsUDSuffix) { 16842fb4ae36825ca3a0cbe7e845c5747062870066beRichard Smith if (!isLexingRawMode()) 16854ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith Diag(CurPtr, getLangOpts().MicrosoftMode ? 1686b0afd5df3c427c329f6c5e00fe264c5bada3bf36Francois Pichet diag::ext_ms_reserved_user_defined_literal : 1687b0afd5df3c427c329f6c5e00fe264c5bada3bf36Francois Pichet diag::ext_reserved_user_defined_literal) 1688e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); 1689e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith return CurPtr; 1690e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith } 1691e816c717d4de1de6c67f1fd5ef4a927fe2bf2ea7Richard Smith 169299831e4677a7e2e051af636221694d60ba31fcdbRichard Smith Result.setFlag(Token::HasUDSuffix); 16935cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith do { 16945cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith CurPtr = ConsumeChar(CurPtr, Size, Result); 16955cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith C = getCharAndSize(CurPtr, Size); 16965cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith } while (isIdentifierBody(C)); 16975cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith } 16985cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith return CurPtr; 16995cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith} 17005cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 17015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexStringLiteral - Lex the remainder of a string literal, after having lexed 17025cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor/// either " or L" or u8" or u" or U". 1703d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, 17045cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::TokenKind Kind) { 17055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *NulCharacter = 0; // Does this string contain the \0 character? 17061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1707661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode() && 1708661a99690bc133bbaa029da925481d4a860dec90Richard Smith (Kind == tok::utf8_string_literal || 1709661a99690bc133bbaa029da925481d4a860dec90Richard Smith Kind == tok::utf16_string_literal || 1710d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith Kind == tok::utf32_string_literal)) 1711d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith Diag(BufferPtr, getLangOpts().CPlusPlus 1712d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith ? diag::warn_cxx98_compat_unicode_literal 1713d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith : diag::warn_c99_compat_unicode_literal); 1714661a99690bc133bbaa029da925481d4a860dec90Richard Smith 17155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 17165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '"') { 1717571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner // Skip escaped characters. Escaped newlines will already be processed by 1718571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner // getAndAdvanceChar. 1719571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner if (C == '\\') 17205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 172133611e0d5ab1372608a7649b1877cd4300621c71Douglas Gregor 1722571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner if (C == '\n' || C == '\r' || // Newline. 172333611e0d5ab1372608a7649b1877cd4300621c71Douglas Gregor (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. 17244e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 1725b6ebd4490235c9ea6016530d623c46d0b9ce565bRichard Smith Diag(BufferPtr, diag::ext_unterminated_string); 17269e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1727d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 1729571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner 17307d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (C == 0) { 17317d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 17327d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 17337d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1734d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman cutOffLexing(); 1735d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17367d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 17377d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 1738571339c215dc3fa44acccb89c7c6b1d5b962b7f2Chris Lattner NulCharacter = CurPtr-1; 17397d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 17405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 17415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 17421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 17435cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 17444e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 17454ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, true); 17465cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 17475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If a nul character existed in the string, warn about it. 174874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 174974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(NulCharacter, diag::null_in_string); 17505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 17515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of the token as well as the BufferPtr instance var. 175247246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 17535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor FormTokenWithChars(Result, CurPtr, Kind); 175447246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1755d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 17565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 17575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 17582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// LexRawStringLiteral - Lex the remainder of a raw string literal, after 17592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// having lexed R", LR", u8R", uR", or UR". 1760d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, 17612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::TokenKind Kind) { 17622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3: 17632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Between the initial and final double quote characters of the raw string, 17642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // any transformations performed in phases 1 and 2 (trigraphs, 17652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // universal-character-names, and line splicing) are reverted. 17662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 1767661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode()) 1768661a99690bc133bbaa029da925481d4a860dec90Richard Smith Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal); 1769661a99690bc133bbaa029da925481d4a860dec90Richard Smith 17702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper unsigned PrefixLen = 0; 17712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 17722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) 17732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ++PrefixLen; 17742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 17752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // If the last character was not a '(', then we didn't lex a valid delimiter. 17762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (CurPtr[PrefixLen] != '(') { 17772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (!isLexingRawMode()) { 17782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *PrefixEnd = &CurPtr[PrefixLen]; 17792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (PrefixLen == 16) { 17802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(PrefixEnd, diag::err_raw_delim_too_long); 17812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } else { 17822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(PrefixEnd, diag::err_invalid_char_raw_delim) 17832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper << StringRef(PrefixEnd, 1); 17842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 17852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 17862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 17872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Search for the next '"' in hopes of salvaging the lexer. Unfortunately, 17882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // it's possible the '"' was intended to be part of the raw string, but 17892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // there's not much we can do about that. 17902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (1) { 17912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char C = *CurPtr++; 17922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 17932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == '"') 17942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 17952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == 0 && CurPtr-1 == BufferEnd) { 17962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper --CurPtr; 17972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 17982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 17992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr, tok::unknown); 1802d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Save prefix and move CurPtr past it 18062fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *Prefix = CurPtr; 18072fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper CurPtr += PrefixLen + 1; // skip over prefix and '(' 18082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18092fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper while (1) { 18102fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char C = *CurPtr++; 18112fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (C == ')') { 18132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Check for prefix match and closing quote. 18142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') { 18152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper CurPtr += PrefixLen + 1; // skip over prefix and '"' 18162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper break; 18172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. 18192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (!isLexingRawMode()) 18202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Diag(BufferPtr, diag::err_unterminated_raw_string) 18212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper << StringRef(Prefix, PrefixLen); 18222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1823d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 18262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18275cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 18284e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 18294ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, true); 18305cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 18312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Update the location of token as well as BufferPtr. 18322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper const char *TokStart = BufferPtr; 18332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper FormTokenWithChars(Result, CurPtr, Kind); 18342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Result.setLiteralData(TokStart); 1835d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper} 18372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 18385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexAngledStringLiteral - Lex the remainder of an angled string literal, 18395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// after having lexed the '<' character. This is used for #include filenames. 1840d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { 18415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *NulCharacter = 0; // Does this string contain the \0 character? 18429cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner const char *AfterLessPos = CurPtr; 18435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 18445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '>') { 18455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip escaped characters. 18465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\\') { 18475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip the escaped character. 184860b202c5eb6fb2d608bfef05523f40fdf5091d48Dmitri Gribenko getAndAdvanceChar(CurPtr, Result); 18495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == '\n' || C == '\r' || // Newline. 18507d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis (C == 0 && (CurPtr-1 == BufferEnd || // End of file. 18517d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis isCodeCompletionPoint(CurPtr-1)))) { 18529cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner // If the filename is unterminated, then it must just be a lone < 18539cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner // character. Return this as such. 18549cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner FormTokenWithChars(Result, AfterLessPos, tok::less); 1855d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == 0) { 18575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer NulCharacter = CurPtr-1; 18585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 18595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 18605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 18611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 18625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If a nul character existed in the string, warn about it. 186374d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 186474d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(NulCharacter, diag::null_in_string); 18651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 18665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 186747246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 18689e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); 186947246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1870d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 18725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 18735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 18745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexCharConstant - Lex the remainder of a character constant, after having 18755cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor/// lexed either ' or L' or u' or U'. 1876d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexCharConstant(Token &Result, const char *CurPtr, 18775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::TokenKind Kind) { 18785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *NulCharacter = 0; // Does this character contain the \0 character? 18795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1880661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode() && 1881d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)) 1882d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith Diag(BufferPtr, getLangOpts().CPlusPlus 1883d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith ? diag::warn_cxx98_compat_unicode_literal 1884d4bf760d057f1e8724ab7be340a2d42adae0900aRichard Smith : diag::warn_c99_compat_unicode_literal); 1885661a99690bc133bbaa029da925481d4a860dec90Richard Smith 18865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C = getAndAdvanceChar(CurPtr, Result); 18875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '\'') { 18884e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 1889b6ebd4490235c9ea6016530d623c46d0b9ce565bRichard Smith Diag(BufferPtr, diag::ext_empty_character); 18909e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 1891d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 18925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 18931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1894d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner while (C != '\'') { 1895d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner // Skip escaped characters. 18966d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == '\\') 18976d926ae667cca926e77ddce1734514911706ed0aNico Weber C = getAndAdvanceChar(CurPtr, Result); 18986d926ae667cca926e77ddce1734514911706ed0aNico Weber 18996d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == '\n' || C == '\r' || // Newline. 19006d926ae667cca926e77ddce1734514911706ed0aNico Weber (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. 19014e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) 1902b6ebd4490235c9ea6016530d623c46d0b9ce565bRichard Smith Diag(BufferPtr, diag::ext_unterminated_char); 1903d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1904d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19056d926ae667cca926e77ddce1734514911706ed0aNico Weber } 19066d926ae667cca926e77ddce1734514911706ed0aNico Weber 19076d926ae667cca926e77ddce1734514911706ed0aNico Weber if (C == 0) { 19087d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 19097d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 19107d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr-1, tok::unknown); 1911d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman cutOffLexing(); 1912d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19137d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 19147d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 1915d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner NulCharacter = CurPtr-1; 1916d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner } 1917d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner C = getAndAdvanceChar(CurPtr, Result); 19185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19191eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19205cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith // If we are in C++11, lex the optional ud-suffix. 19214e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (getLangOpts().CPlusPlus) 19224ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith CurPtr = LexUDSuffix(Result, CurPtr, false); 19235cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith 1924d80f786689d608e5c22d6e1045884de7aff76c40Chris Lattner // If a nul character existed in the character, warn about it. 192574d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (NulCharacter && !isLexingRawMode()) 192674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(NulCharacter, diag::null_in_char); 19275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 192947246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner const char *TokStart = BufferPtr; 19305cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor FormTokenWithChars(Result, CurPtr, Kind); 193147246be8ac5b0ddde6c402b8fc6946b6135487b5Chris Lattner Result.setLiteralData(TokStart); 1932d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 19335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 19345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 19355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// SkipWhitespace - Efficiently skip over a series of whitespace characters. 19365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// Update BufferPtr to point to the next non-whitespace character and return. 1937d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// 1938d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// This method forms a token and returns true if KeepWhitespaceMode is enabled. 1939d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner/// 1940d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, 1941d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 19425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Whitespace - Skip it, then return the token after the whitespace. 19436aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose bool SawNewline = isVerticalWhitespace(CurPtr[-1]); 19446aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 19458f1900376906c51aabf5fd18a39524e2318276baRichard Smith unsigned char Char = *CurPtr; 19468f1900376906c51aabf5fd18a39524e2318276baRichard Smith 19478f1900376906c51aabf5fd18a39524e2318276baRichard Smith // Skip consecutive spaces efficiently. 19485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 19495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip horizontal whitespace very aggressively. 19505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (isHorizontalWhitespace(Char)) 19515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = *++CurPtr; 19521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1953ddd3e8b90a92c39f620bda7a0945320c0b9e60dbDaniel Dunbar // Otherwise if we have something other than whitespace, we're done. 19546aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose if (!isVerticalWhitespace(Char)) 19555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 19561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 19585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // End of preprocessor directive line, let LexTokenInternal handle this. 19595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 1960d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return false; 19615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19638f1900376906c51aabf5fd18a39524e2318276baRichard Smith // OK, but handle newline. 19646aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose SawNewline = true; 19655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = *++CurPtr; 19665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 19675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1968d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // If the client wants us to return whitespace, return it now. 1969d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner if (isKeepWhitespaceMode()) { 19709e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 1971d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SawNewline) { 19726aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose IsAtStartOfLine = true; 1973d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 1974d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 19756aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // FIXME: The next token will not have LeadingSpace set. 1976d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return true; 1977d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner } 19781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 19796aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // If this isn't immediately after a newline, there is leading space. 19806aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose char PrevChar = CurPtr[-1]; 19816aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); 19826aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 19836aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 1984d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SawNewline) { 19856aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose Result.setFlag(Token::StartOfLine); 1986d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine = true; 1987d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 19886aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 19895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 1990d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner return false; 19915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 19925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1993bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// We have just read the // characters from input. Skip until we find the 1994bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// newline character thats terminate the comment. Then update BufferPtr and 1995bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// return. 1996046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// 1997046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// If we're in KeepCommentMode or any CommentHandler has inserted 1998046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// some tokens, this will store the first token and return true. 1999d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipLineComment(Token &Result, const char *CurPtr, 2000d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 2001bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // If Line comments aren't explicitly enabled for this language, emit an 20025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // extension warning. 2003bb23628148f555a4cf71f98c27096a7a804c085cNico Weber if (!LangOpts.LineComment && !isLexingRawMode()) { 2004bb23628148f555a4cf71f98c27096a7a804c085cNico Weber Diag(BufferPtr, diag::ext_line_comment); 20051eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Mark them enabled so we only emit one warning for this translation 20075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // unit. 2008bb23628148f555a4cf71f98c27096a7a804c085cNico Weber LangOpts.LineComment = true; 20095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan over the body of the comment. The common case, when scanning, is that 20125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the comment contains normal ascii characters with nothing interesting in 20135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // them. As such, optimize for this case with the inner loop. 20145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char C; 20155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer do { 20165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr; 20175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip over characters in the fast loop. 20185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != 0 && // Potentially EOF. 20195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C != '\n' && C != '\r') // Newline or DOS-style newline. 20205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *++CurPtr; 20215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 20221daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer const char *NextLine = CurPtr; 20231daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (C != 0) { 20241daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // We found a newline, see if it's escaped. 20251daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer const char *EscapePtr = CurPtr-1; 20261daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer while (isHorizontalWhitespace(*EscapePtr)) // Skip whitespace. 20271daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer --EscapePtr; 20281daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer 20291daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (*EscapePtr == '\\') // Escaped newline. 20301daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = EscapePtr; 20311daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && 20321daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer EscapePtr[-2] == '?') // Trigraph-escaped newline. 20331daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = EscapePtr-2; 20341daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer else 20351daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer break; // This is a newline, we're done. 20361daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer } 20371eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to 2039bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // properly decode the character. Read it in raw mode to avoid emitting 2040bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // diagnostics about things like trigraphs. If we see an escaped newline, 2041bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner // we'll handle it below. 20425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *OldPtr = CurPtr; 2043bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner bool OldRawMode = isLexingRawMode(); 2044bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner LexingRawMode = true; 20455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = getAndAdvanceChar(CurPtr, Result); 2046bc3e9842212b78bcf49682641307e4f3419ecc5eChris Lattner LexingRawMode = OldRawMode; 2047ead616c5d8681a41b11273327813e61bda01907aChris Lattner 20481daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // If we only read only one character, then no special handling is needed. 20491daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer // We're done and can skip forward to the newline. 20501daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer if (C != 0 && CurPtr == OldPtr+1) { 20511daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer CurPtr = NextLine; 20521daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer break; 20531daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer } 20541daa58ea8973854b62e79f0939fa37cb6fed00e6Benjamin Kramer 20555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we read multiple characters, and one of those characters was a \r or 20565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n, then we had an escaped newline within the comment. Emit diagnostic 20575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // unless the next line is also a // comment. 20585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { 20595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (; OldPtr != CurPtr; ++OldPtr) 20605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { 20615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Okay, we found a // comment that ends in a newline, if the next 20625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // line is also a // comment, but has spaces, don't emit a diagnostic. 20635d6ae288bc661572e52ea71fc14b249eb5e2d9e9Benjamin Kramer if (isWhitespace(C)) { 20645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *ForwardPtr = CurPtr; 20655d6ae288bc661572e52ea71fc14b249eb5e2d9e9Benjamin Kramer while (isWhitespace(*ForwardPtr)) // Skip whitespace. 20665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++ForwardPtr; 20675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') 20685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 20695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 207174d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 2072bb23628148f555a4cf71f98c27096a7a804c085cNico Weber Diag(OldPtr-1, diag::ext_multi_line_line_comment); 20735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 20745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 20761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 207755817afdf9d453a443262a733f6caf6692dca118Douglas Gregor if (CurPtr == BufferEnd+1) { 207855817afdf9d453a443262a733f6caf6692dca118Douglas Gregor --CurPtr; 207955817afdf9d453a443262a733f6caf6692dca118Douglas Gregor break; 208055817afdf9d453a443262a733f6caf6692dca118Douglas Gregor } 20817d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 20827d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { 20837d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 20847d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 20857d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 20867d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 20877d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 20885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } while (C != '\n' && C != '\r'); 20895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 20903d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // Found but did not consume the newline. Notify comment handlers about the 20913d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // comment unless we're in a #if 0 block. 20923d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner if (PP && !isLexingRawMode() && 20933d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), 20943d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner getSourceLocation(CurPtr)))) { 2095046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner BufferPtr = CurPtr; 2096046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner return true; // A token has to be returned. 2097046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner } 20981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 20995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are returning comments as tokens, return this comment as a token. 2100fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner if (inKeepCommentMode()) 2101bb23628148f555a4cf71f98c27096a7a804c085cNico Weber return SaveLineComment(Result, CurPtr); 21025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 21035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are inside a preprocessor directive and we see the end of line, 210484021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // return immediately, so that the lexer can return this as an EOD token. 21055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { 21065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 21072d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 21085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, eat the \n character. We don't care if this is a \n\r or 21117a4f004a77421af876152281b92c4f8ea24afb2dChris Lattner // \r\n sequence. This is an efficiency hack (because we know the \n can't 2112d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // contribute to another token), it isn't needed for correctness. Note that 2113d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // this is ok even in KeepWhitespaceMode, because we would have returned the 2114d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner /// comment above in that mode. 21155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 21161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // The next returned token is at the start of the line. 2118d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::StartOfLine); 2119d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine = true; 21205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // No leading whitespace seen so far. 2121d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::LeadingSpace); 21225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 21232d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 21245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 21255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2126bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// If in save-comment mode, package up this Line comment in an appropriate 2127bb23628148f555a4cf71f98c27096a7a804c085cNico Weber/// way and return it. 2128bb23628148f555a4cf71f98c27096a7a804c085cNico Weberbool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { 21299e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // If we're not in a preprocessor directive, just return the // comment 21309e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // directly. 21319e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::comment); 21321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21338c0b3787e7ccc7978b42dfbb84da2b802c743a5dDavid Blaikie if (!ParsingPreprocessorDirective || LexingRawMode) 21349e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner return true; 21351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2136bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // If this Line-style comment is in a macro definition, transmogrify it into 21379e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner // a C-style block comment. 2138453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor bool Invalid = false; 2139453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor std::string Spelling = PP->getSpelling(Result, &Invalid); 2140453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor if (Invalid) 2141453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor return true; 2142453091cc2082e207ea2c2dda645a9bc01b37fb0cDouglas Gregor 2143bb23628148f555a4cf71f98c27096a7a804c085cNico Weber assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); 21449e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Spelling[1] = '*'; // Change prefix to "/*". 21459e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Spelling += "*/"; // add suffix. 21461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21479e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Result.setKind(tok::comment); 2148374b3837d676133fcc1eb70a25c8baf8ec4a5c4aDmitri Gribenko PP->CreateString(Spelling, Result, 2149a08529cc3f00e0b47a3c028823634129ac46847bAbramo Bagnara Result.getLocation(), Result.getLocation()); 21502d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return true; 21515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 21525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 21535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline 215480d7c52653800d3338cca2c4388348010d2b1921David Blaikie/// character (either \\n or \\r) is part of an escaped newline sequence. Issue 215580d7c52653800d3338cca2c4388348010d2b1921David Blaikie/// a diagnostic if so. We know that the newline is inside of a block comment. 21561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpstatic bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, 21575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Lexer *L) { 21585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); 21591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Back up off the newline. 21615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 21621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a two-character newline sequence, skip the other character. 21645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { 21655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n\n or \r\r -> not escaped newline. 21665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == CurPtr[1]) 21675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 21685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // \n\r or \r\n -> skip the newline. 21695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 21705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have horizontal whitespace, skip over it. We allow whitespace 21735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // between the slash and newline. 21745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer bool HasSpace = false; 21755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { 21765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer --CurPtr; 21775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer HasSpace = true; 21785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 21791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we have a slash, we know this is an escaped newline. 21815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (*CurPtr == '\\') { 21825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[-1] != '*') return false; 21835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 21845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It isn't a slash, is it the ?? / trigraph? 21855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || 21865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[-3] != '*') 21875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 21881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 21895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // This is the trigraph ending the comment. Emit a stern warning! 21905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr -= 2; 21915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 21925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If no trigraphs are enabled, warn that we ignored this trigraph and 21935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // ignore this * character. 21944e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (!L->getLangOpts().Trigraphs) { 219574d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 219674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::trigraph_ignored_block_comment); 21975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return false; 21985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 219974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 220074d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::trigraph_ends_block_comment); 22015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 22021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Warn about having an escaped newline between the */ characters. 220474d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!L->isLexingRawMode()) 220574d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::escaped_newline_block_comment_end); 22061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If there was space between the backslash and newline, warn about it. 220874d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (HasSpace && !L->isLexingRawMode()) 220974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner L->Diag(CurPtr, diag::backslash_newline_space); 22101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return true; 22125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 22135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 22145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef __SSE2__ 22155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include <emmintrin.h> 22165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#elif __ALTIVEC__ 22175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include <altivec.h> 22185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#undef bool 22195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#endif 22205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2221ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// We have just read from input the / and * characters that started a comment. 2222ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// Read until we find the * and / characters that terminate the comment. 2223ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// Note that we don't bother decoding trigraphs or escaped newlines in block 2224ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// comments, because they cannot cause the comment to end. The only thing 2225ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// that can happen is the comment could end with an escaped newline between 2226ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// the terminating * and /. 22272d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner/// 2228046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// If we're in KeepCommentMode or any CommentHandler has inserted 2229046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner/// some tokens, this will store the first token and return true. 2230d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, 2231d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool &TokAtPhysicalStartOfLine) { 22325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan one character past where we should, looking for a '/' character. Once 2233fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner // we find it, check to see if it was preceded by a *. This common 22345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // optimization helps people who like to put a lot of * characters in their 22355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // comments. 22368146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner 22378146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // The first character we get with newlines and trigraphs skipped to handle 22388146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // the degenerate /*/ case below correctly if the * has an escaped newline 22398146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // after it. 22408146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner unsigned CharSize; 22418146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner unsigned char C = getCharAndSize(CurPtr, CharSize); 22428146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner CurPtr += CharSize; 22435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == 0 && CurPtr == BufferEnd+1) { 22447d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (!isLexingRawMode()) 22450af574270d3be2b0e73a3379dfaa633746f8fc6fChris Lattner Diag(BufferPtr, diag::err_unterminated_block_comment); 224631f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner --CurPtr; 22471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 224831f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // KeepWhitespaceMode should return this broken comment as a token. Since 224931f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // it isn't a well formed comment, just return it as an 'unknown' token. 225031f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner if (isKeepWhitespaceMode()) { 22519e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 225231f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner return true; 225331f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner } 22541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 225531f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner BufferPtr = CurPtr; 22562d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 22575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 22581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22598146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // Check to see if the first character after the '/*' is another /. If so, 22608146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner // then this slash does not end the block comment, it is part of it. 22618146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner if (C == '/') 22628146b6851e873eab71341f1b1d3198894bc4c0acChris Lattner C = *CurPtr++; 22631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 22655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Skip over all non-interesting characters until we find end of buffer or a 22665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // (probably ending) '/' character. 22677d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (CurPtr + 24 < BufferEnd && 22687d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // If there is a code-completion point avoid the fast scan because it 22697d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // doesn't check for '\0'. 22707d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { 22715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // While not aligned to a 16-byte boundary. 22725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) 22735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 22741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 22755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '/') goto FoundSlash; 22765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 22775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef __SSE2__ 22783f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer __m128i Slashes = _mm_set1_epi8('/'); 22793f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer while (CurPtr+16 <= BufferEnd) { 228031ba6135375433b617a8587ea6cc836a014ebd86Roman Divacky int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, 228131ba6135375433b617a8587ea6cc836a014ebd86Roman Divacky Slashes)); 22823f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer if (cmp != 0) { 22836300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // Adjust the pointer to point directly after the first slash. It's 22846300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // not necessary to set C here, it will be overwritten at the end of 22856300f5b4382d5128c9346a4d4f05e7bac2e3d771Benjamin Kramer // the outer loop. 22869779fdd271bb6a938bdee93f901e4ef7b1a88610Michael J. Spencer CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1; 22873f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer goto FoundSlash; 22883f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer } 22895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += 16; 22903f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer } 22915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#elif __ALTIVEC__ 22925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer __vector unsigned char Slashes = { 22931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump '/', '/', '/', '/', '/', '/', '/', '/', 22945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer '/', '/', '/', '/', '/', '/', '/', '/' 22955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer }; 22965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (CurPtr+16 <= BufferEnd && 22975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes)) 22985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += 16; 22991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump#else 23005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Scan for '/' quickly. Many block comments are very large. 23015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (CurPtr[0] != '/' && 23025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[1] != '/' && 23035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[2] != '/' && 23045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr[3] != '/' && 23055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr+4 < BufferEnd) { 23065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += 4; 23075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#endif 23091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It has to be one of the bytes scanned, increment to it and read one. 23115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Loop to scan the remainder. 23155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (C != '/' && C != '\0') 23165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (C == '/') { 23193f6f4e65e7806fd49137cf3cf59a1432ed776b2aBenjamin Kramer FoundSlash: 23205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[-2] == '*') // We found the final */. We're done! 23215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 23221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { 23245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { 23255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We found the final */, though it had an escaped newline between the 23265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // * and /. We're done! 23275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 23285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr[0] == '*' && CurPtr[1] != '/') { 23315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If this is a /* inside of the comment, emit a warning. Don't do this 23325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // if this is a /*/, which will end the comment. This misses cases with 23335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // embedded escaped newlines, but oh well. 233474d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 233574d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::warn_nested_block_comment); 23365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (C == 0 && CurPtr == BufferEnd+1) { 23387d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (!isLexingRawMode()) 233974d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(BufferPtr, diag::err_unterminated_block_comment); 23405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Note: the user probably forgot a */. We could continue immediately 23415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // after the /*, but this would involve lexing a lot of what really is the 23425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // comment, which surely would confuse the parser. 234331f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner --CurPtr; 23441eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 234531f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // KeepWhitespaceMode should return this broken comment as a token. Since 234631f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner // it isn't a well formed comment, just return it as an 'unknown' token. 234731f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner if (isKeepWhitespaceMode()) { 23489e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 234931f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner return true; 235031f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner } 23511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 235231f0ecae4b2d4d268d04c562a43d9fbfee96f189Chris Lattner BufferPtr = CurPtr; 23532d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 23547d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { 23557d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 23567d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 23577d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 23585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23597d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 23605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer C = *CurPtr++; 23615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 23633d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner // Notify comment handlers about the comment unless we're in a #if 0 block. 23643d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner if (PP && !isLexingRawMode() && 23653d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), 23663d0ad58b28e0d50fca7f21c6a078b05370510288Chris Lattner getSourceLocation(CurPtr)))) { 2367046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner BufferPtr = CurPtr; 2368046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner return true; // A token has to be returned. 2369046c2277dcbcc8eb89dbb5b1b8c5226b7cb81635Chris Lattner } 23702e22253e03e175144aeb9d13350a12fd83f858beDouglas Gregor 23715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are returning comments as tokens, return this comment as a token. 2372fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner if (inKeepCommentMode()) { 23739e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::comment); 23742d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return true; 23755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 23775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // It is common for the tokens immediately after a /**/ comment to be 23785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // whitespace. Instead of going through the big switch, handle it 2379d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // efficiently now. This is safe even in KeepWhitespaceMode because we would 2380d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // have already returned above with the comment as a token. 23815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (isHorizontalWhitespace(*CurPtr)) { 2382d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); 23832d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 23845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 23855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 23865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Otherwise, just return so that the next character will be lexed as a token. 23875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2388d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 23892d3818959df7b71bfcf8c2f2b573856bfc4c3a21Chris Lattner return false; 23905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 23915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 23925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 23935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// Primary Lexing Entry Points 23945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===// 23955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 23965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ReadToEndOfLine - Read the rest of the current preprocessor line as an 23975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// uninterpreted string. This switches the lexer out of directive mode. 23983093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramervoid Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { 23995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(ParsingPreprocessorDirective && ParsingFilename == false && 24005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer "Must be in a preprocessing directive!"); 2401d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token Tmp; 24025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // CurPtr - Cache BufferPtr in an automatic variable. 24045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *CurPtr = BufferPtr; 24055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (1) { 24065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Char = getAndAdvanceChar(CurPtr, Tmp); 24075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Char) { 24085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: 24093093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer if (Result) 24103093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer Result->push_back(Char); 24115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 24125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 0: // Null. 24135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Found end of file? 24145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (CurPtr-1 != BufferEnd) { 24157d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 24167d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 24177d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis cutOffLexing(); 24183093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer return; 24197d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 24207d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 24215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Nope, normal character, continue. 24223093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer if (Result) 24233093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer Result->push_back(Char); 24245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 24255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FALL THROUGH. 24275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\r': 24285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\n': 24295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Okay, we found the end of the line. First, back up past the \0, \r, \n. 24305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(CurPtr[-1] == Char && "Trigraphs for newline?"); 24315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr-1; 24321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 243384021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // Next, lex the character, which should handle the EOD transition. 24345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Lex(Tmp); 243555817afdf9d453a443262a733f6caf6692dca118Douglas Gregor if (Tmp.is(tok::code_completion)) { 24367d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP) 24377d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis PP->CodeCompleteNaturalLanguage(); 243855817afdf9d453a443262a733f6caf6692dca118Douglas Gregor Lex(Tmp); 243955817afdf9d453a443262a733f6caf6692dca118Douglas Gregor } 244084021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne assert(Tmp.is(tok::eod) && "Unexpected token!"); 24411eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 24423093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer // Finally, we're done; 24433093b20d824a953d8bc7a786dd952414898f8d6dBenjamin Kramer return; 24445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 24475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 24485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexEndOfFile - CurPtr points to the end of this file. Handle this 24495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// condition, reporting diagnostics and handling other edge cases as required. 24505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// This returns true if Result contains a token, false if PP.Lex should be 24515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// called again. 2452d217773f106856a11879ec79dc468efefaf2ee75Chris Lattnerbool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { 24535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we hit the end of the file while parsing a preprocessor directive, 24545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // end the preprocessor directive first. The next token returned will 24555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // then be the end of file. 24565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 24575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Done parsing the "line". 24585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ParsingPreprocessorDirective = false; 24595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 246084021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne FormTokenWithChars(Result, CurPtr, tok::eod); 24611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 24625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore comment saving mode, in case it was disabled for directive. 24636aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose resetExtendedTokenMode(); 24645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return true; // Have a token. 24651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 246686d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor 246786d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor // If we are in raw mode, return this event as an EOF token. Let the caller 246886d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor // that put us in raw mode handle the event. 246986d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor if (isLexingRawMode()) { 247086d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor Result.startToken(); 247186d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor BufferPtr = BufferEnd; 247286d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor FormTokenWithChars(Result, BufferEnd, tok::eof); 247386d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor return true; 247486d9a52c24d390631a888d4ff812e1b15445e0a0Douglas Gregor } 247581b747b7fcc91c2fba9a3183d8fac80adbfc1d3eDouglas Gregor 2476f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor // Issue diagnostics for unterminated #if and missing newline. 2477f44e854ed1e3aa86d2ed6d615ccd109d50ddcff9Douglas Gregor 24785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are in a #if directive, emit an error. 24795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (!ConditionalStack.empty()) { 24807d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP->getCodeCompletionFileLoc() != FileLoc) 24812d474ba9e8ae43a1a5a9f72718c0d79092b9453fDouglas Gregor PP->Diag(ConditionalStack.back().IfLoc, 24822d474ba9e8ae43a1a5a9f72718c0d79092b9453fDouglas Gregor diag::err_pp_unterminated_conditional); 24835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ConditionalStack.pop_back(); 24845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 24851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2486b25e5d79d9d1967df058a242e96a62d0d0ace074Chris Lattner // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue 2487b25e5d79d9d1967df058a242e96a62d0d0ace074Chris Lattner // a pedwarn. 24887865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { 24897865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagnosticsEngine &Diags = PP->getDiagnostics(); 24907865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose SourceLocation EndLoc = getSourceLocation(BufferEnd); 24917865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose unsigned DiagID; 24927865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose 24937865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose if (LangOpts.CPlusPlus11) { 24947865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // C++11 [lex.phases] 2.2 p2 24957865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // Prefer the C++98 pedantic compatibility warning over the generic, 24967865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose // non-extension, user-requested "missing newline at EOF" warning. 24977865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose if (Diags.getDiagnosticLevel(diag::warn_cxx98_compat_no_newline_eof, 24987865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose EndLoc) != DiagnosticsEngine::Ignored) { 24997865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::warn_cxx98_compat_no_newline_eof; 25007865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } else { 25017865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::warn_no_newline_eof; 25027865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25037865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } else { 25047865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose DiagID = diag::ext_no_newline_eof; 25057865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25067865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose 25077865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose Diag(BufferEnd, DiagID) 25087865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose << FixItHint::CreateInsertion(EndLoc, "\n"); 25097865b8e4324378e06f59adb4d60bec26a7d3d584Jordan Rose } 25101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 25125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 25135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Finally, let the preprocessor handle this. 25140cdd1fe3ec29b5cbff9a728966ace5c5b5d614f7Jordan Rose return PP->HandleEndOfFile(Result, isPragmaLexer()); 25155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 25165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 25175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from 25185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// the specified lexer will return a tok::l_paren token, 0 if it is something 25195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// else and 2 if there are no more tokens in the buffer controlled by the 25205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// lexer. 25215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerunsigned Lexer::isNextPPTokenLParen() { 25225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); 25231eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Switch to 'skipping' mode. This will ensure that we can lex a token 25255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // without emitting diagnostics, disables macro expansion, and will cause EOF 25265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // to return an EOF token instead of popping the include stack. 25275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer LexingRawMode = true; 25281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Save state that can be changed while lexing so that we can restore it. 25305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *TmpBufferPtr = BufferPtr; 2531a864cf7c1d774a0f790bfc46befc87d9dbf1f65cChris Lattner bool inPPDirectiveMode = ParsingPreprocessorDirective; 2532d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atStartOfLine = IsAtStartOfLine; 2533d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; 2534d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool leadingSpace = HasLeadingSpace; 25351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2536d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Token Tok; 2537d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Lex(Tok); 25381eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore state that may have changed. 25405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = TmpBufferPtr; 2541a864cf7c1d774a0f790bfc46befc87d9dbf1f65cChris Lattner ParsingPreprocessorDirective = inPPDirectiveMode; 2542d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = leadingSpace; 2543d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = atStartOfLine; 2544d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = atPhysicalStartOfLine; 25451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 25465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore the lexer back to non-skipping mode. 25475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer LexingRawMode = false; 25481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 254922f6bbcafa8871f4f20c4402d9cbc5c024fee99aChris Lattner if (Tok.is(tok::eof)) 25505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return 2; 255122f6bbcafa8871f4f20c4402d9cbc5c024fee99aChris Lattner return Tok.is(tok::l_paren); 25525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 25535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2554ec7699398bad018e8ea8134ef760a259598fefc1James Dennett/// \brief Find the end of a version control conflict marker. 2555d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smithstatic const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, 2556d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith ConflictMarkerKind CMK) { 2557d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; 2558d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith size_t TermLen = CMK == CMK_Perforce ? 5 : 7; 2559d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen); 2560d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith size_t Pos = RestOfBuffer.find(Terminator); 25615f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner while (Pos != StringRef::npos) { 256234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Must occur at start of line. 256334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (RestOfBuffer[Pos-1] != '\r' && 256434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner RestOfBuffer[Pos-1] != '\n') { 2565d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith RestOfBuffer = RestOfBuffer.substr(Pos+TermLen); 2566d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith Pos = RestOfBuffer.find(Terminator); 256734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner continue; 256834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 256934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return RestOfBuffer.data()+Pos; 257034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 257134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return 0; 257234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 257334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 257434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// IsStartOfConflictMarker - If the specified pointer is the start of a version 257534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// control conflict marker like '<<<<<<<', recognize it as such, emit an error 257634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// and recover nicely. This returns true if it is a conflict marker and false 257734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner/// if not. 257834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattnerbool Lexer::IsStartOfConflictMarker(const char *CurPtr) { 257934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Only a conflict marker if it starts at the beginning of a line. 258034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr != BufferStart && 258134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr[-1] != '\n' && CurPtr[-1] != '\r') 258234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 258334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2584d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if we have <<<<<<< or >>>>. 2585d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) != "<<<<<<<") && 2586d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) != ">>>> ")) 258734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 258834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 258934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we have a situation where we don't care about conflict markers, ignore 259034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // it. 2591d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (CurrentConflictMarkerState || isLexingRawMode()) 259234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 259334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2594d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce; 2595d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith 2596d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if there is an ending marker somewhere in the buffer at the 2597d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // start of a line to terminate this conflict marker. 2598d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (FindConflictEnd(CurPtr, BufferEnd, Kind)) { 259934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // We found a match. We are really in a conflict marker. 260034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Diagnose this, and ignore to the end of line. 260134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Diag(CurPtr, diag::err_conflict_marker); 2602d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = Kind; 260334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 260434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Skip ahead to the end of line. We know this exists because the 260534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // end-of-conflict marker starts with \r or \n. 260634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner while (*CurPtr != '\r' && *CurPtr != '\n') { 260734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner assert(CurPtr != BufferEnd && "Didn't find end of line"); 260834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner ++CurPtr; 260934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 261034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner BufferPtr = CurPtr; 261134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return true; 261234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 261334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 261434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // No end of conflict marker found. 261534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 261634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 261734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 261834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2619d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if 2620d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it 2621d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// is the end of a conflict marker. Handle it by ignoring up until the end of 2622d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith/// the line. This returns true if it is a conflict marker and false if not. 262334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattnerbool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { 262434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Only a conflict marker if it starts at the beginning of a line. 262534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr != BufferStart && 262634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr[-1] != '\n' && CurPtr[-1] != '\r') 262734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 262834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 262934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we have a situation where we don't care about conflict markers, ignore 263034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // it. 2631d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (!CurrentConflictMarkerState || isLexingRawMode()) 263234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 263334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 2634d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // Check to see if we have the marker (4 characters in a row). 2635d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith for (unsigned i = 1; i != 4; ++i) 263634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[i] != CurPtr[0]) 263734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 263834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 263934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If we do have it, search for the end of the conflict marker. This could 264034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // fail if it got skipped with a '#if 0' or something. Note that CurPtr might 264134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // be the end of conflict marker. 2642d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith if (const char *End = FindConflictEnd(CurPtr, BufferEnd, 2643d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState)) { 264434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = End; 264534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 264634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // Skip ahead to the end of line. 264734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n') 264834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner ++CurPtr; 264934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 265034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner BufferPtr = CurPtr; 265134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 265234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // No longer in the conflict marker. 2653d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith CurrentConflictMarkerState = CMK_None; 265434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return true; 265534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 265634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 265734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner return false; 265834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner} 265934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 26607d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidisbool Lexer::isCodeCompletionPoint(const char *CurPtr) const { 26617d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (PP && PP->isCodeCompletionEnabled()) { 2662a64ccefdf0ea4e03ec88805d71b0af74950c7472Argyrios Kyrtzidis SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); 26637d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return Loc == PP->getCodeCompletionLoc(); 26647d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 26657d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 26667d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis return false; 26677d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis} 26687d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 2669c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Roseuint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, 2670c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Token *Result) { 2671c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned CharSize; 2672c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char Kind = getCharAndSize(StartPtr, CharSize); 2673c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2674c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned NumHexDigits; 2675c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Kind == 'u') 2676c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose NumHexDigits = 4; 2677c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else if (Kind == 'U') 2678c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose NumHexDigits = 8; 2679c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else 2680c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2681c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2682bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose if (!LangOpts.CPlusPlus && !LangOpts.C99) { 26838094bac4e987caf90e8fd719c24545add8dafcb6Jordan Rose if (Result && !isLexingRawMode()) 26848094bac4e987caf90e8fd719c24545add8dafcb6Jordan Rose Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); 2685bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose return 0; 2686bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose } 2687bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose 2688c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *CurPtr = StartPtr + CharSize; 2689c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose const char *KindLoc = &CurPtr[-1]; 2690c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2691c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose uint32_t CodePoint = 0; 2692c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose for (unsigned i = 0; i < NumHexDigits; ++i) { 2693c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char C = getCharAndSize(CurPtr, CharSize); 2694c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2695c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose unsigned Value = llvm::hexDigitValue(C); 2696c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Value == -1U) { 2697c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result && !isLexingRawMode()) { 2698c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (i == 0) { 2699c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_no_digits) 2700c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose << StringRef(KindLoc, 1); 2701c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else { 2702c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_incomplete); 2703b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose 2704b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose // If the user wrote \U1234, suggest a fixit to \u. 2705b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose if (i == 4 && NumHexDigits == 8) { 2706ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1); 2707b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose Diag(KindLoc, diag::note_ucn_four_not_eight) 2708b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose << FixItHint::CreateReplacement(URange, "u"); 2709b87672b124ab4fbe6f2cabc2ad71655fc71230eaJordan Rose } 2710c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2711c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2712bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose 2713c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2714c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2715c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2716c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CodePoint <<= 4; 2717c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CodePoint += Value; 2718c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2719c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose CurPtr += CharSize; 2720c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2721c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2722c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result) { 2723c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Result->setFlag(Token::HasUCN); 2724b6c08a64145485a7c233761220b8d82b74aa7546NAKAMURA Takumi if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2) 2725c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose StartPtr = CurPtr; 2726c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else 2727c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose while (StartPtr != CurPtr) 2728c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose (void)getAndAdvanceChar(StartPtr, *Result); 2729c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } else { 2730c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose StartPtr = CurPtr; 2731c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2732c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2733c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C99 6.4.3p2: A universal character name shall not specify a character whose 2734c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or 2735c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // 0060 (`), nor one in the range D800 through DFFF inclusive.) 2736c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C++11 [lex.charset]p2: If the hexadecimal value for a 2737c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // universal-character-name corresponds to a surrogate code point (in the 2738c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally, 2739c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // if the hexadecimal value for a universal-character-name outside the 2740c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // c-char-sequence, s-char-sequence, or r-char-sequence of a character or 2741c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // string literal corresponds to a control character (in either of the 2742c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the 2743c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // basic source character set, the program is ill-formed. 2744c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint < 0xA0) { 2745c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60) 2746c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return CodePoint; 2747c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2748c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // We don't use isLexingRawMode() here because we need to warn about bad 2749c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs even when skipping preprocessing tokens in a #if block. 2750c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (Result && PP) { 2751c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (CodePoint < 0x20 || CodePoint >= 0x7F) 2752c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::err_ucn_control_character); 2753c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose else { 2754c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose char C = static_cast<char>(CodePoint); 2755c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1); 2756c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2757c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2758c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2759c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2760ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 2761ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) { 2762c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't. 2763ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose // We don't use isLexingRawMode() here because we need to diagnose bad 2764c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs even when skipping preprocessing tokens in a #if block. 2765ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (Result && PP) { 2766ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11) 2767ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diag(BufferPtr, diag::warn_ucn_escape_surrogate); 2768ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose else 2769ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose Diag(BufferPtr, diag::err_ucn_escape_invalid); 2770ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 2771c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return 0; 2772c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2773c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2774c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return CodePoint; 2775c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 2776c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2777d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, 2778d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman const char *CurPtr) { 2779263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( 2780263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko UnicodeWhitespaceCharRanges); 278174c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose if (!isLexingRawMode() && !PP->isPreprocessedOutput() && 2782263cea4485040bb590800ef3290448a81f0dbc4bAlexander Kornienko UnicodeWhitespaceChars.contains(C)) { 278374c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(BufferPtr, diag::ext_unicode_whitespace) 2784ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << makeCharRange(*this, BufferPtr, CurPtr); 2785fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose 2786fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose Result.setFlag(Token::LeadingSpace); 2787d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2788fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose } 2789d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 2790d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 2791fc12060ed595fd23d731b8a86adb21ddbb8c7bfbJordan Rose 2792d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { 2793ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { 2794ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose if (!isLexingRawMode() && !ParsingPreprocessorDirective && 2795ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose !PP->isPreprocessedOutput()) { 2796ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, 2797ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose makeCharRange(*this, BufferPtr, CurPtr), 2798ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose /*IsFirst=*/true); 2799ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose } 2800ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose 2801c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose MIOpt.ReadToken(); 2802c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexIdentifier(Result, CurPtr); 2803c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2804c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 28050ed439487491e09faffdbabfacb1d050292c7723Jordan Rose if (!isLexingRawMode() && !ParsingPreprocessorDirective && 28060ed439487491e09faffdbabfacb1d050292c7723Jordan Rose !PP->isPreprocessedOutput() && 2807ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) { 2808c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Non-ASCII characters tend to creep into source code unintentionally. 2809c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Instead of letting the parser complain about the unknown token, 2810c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // just drop the character. 2811c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Note that we can /only/ do this when the non-ASCII character is actually 2812c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // spelled as Unicode, not written as a UCN. The standard requires that 2813c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // we not throw away any possible preprocessor tokens, but there's a 2814c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // loophole in the mapping of Unicode characters to basic character set 2815c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // characters that allows us to map these particular characters to, say, 2816c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // whitespace. 281774c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(BufferPtr, diag::err_non_ascii) 2818ed9c59f2ae338f6f70c1fed2ce7b0d8a5eb3ba1cJordan Rose << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); 2819c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2820c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose BufferPtr = CurPtr; 2821d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 2822c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 2823c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2824c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Otherwise, we have an explicit UCN or a character that's unlikely to show 2825c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // up by accident. 2826c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose MIOpt.ReadToken(); 2827c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose FormTokenWithChars(Result, CurPtr, tok::unknown); 2828d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2829d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 2830d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2831d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanvoid Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { 2832d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = Result.isAtStartOfLine(); 2833d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = Result.hasLeadingSpace(); 2834d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro(); 2835d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Note that this doesn't affect IsAtPhysicalStartOfLine. 2836c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose} 2837c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 2838d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::Lex(Token &Result) { 2839d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Start a new token. 2840d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.startToken(); 2841d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2842d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // Set up misc whitespace flags for LexTokenInternal. 2843d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (IsAtStartOfLine) { 2844d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::StartOfLine); 2845d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtStartOfLine = false; 2846d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2847d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2848d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (HasLeadingSpace) { 2849d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::LeadingSpace); 2850d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingSpace = false; 2851d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2852d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2853d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (HasLeadingEmptyMacro) { 2854d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman Result.setFlag(Token::LeadingEmptyMacro); 2855d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman HasLeadingEmptyMacro = false; 2856d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 2857d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2858d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; 2859d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = false; 28600f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman bool isRawLex = isLexingRawMode(); 28610f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman (void) isRawLex; 28620f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); 28630f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman // (After the LexTokenInternal call, the lexer might be destroyed.) 28640f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman assert((returnedToken || !isRawLex) && "Raw lex must succeed"); 28650f238a61be7ba8a52383fc3419889d52431316d4Eli Friedman return returnedToken; 2866d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman} 28675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 28685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// LexTokenInternal - This implements a simple C family lexer. It is an 28695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// extremely performance critical piece of code. This assumes that the buffer 2870efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// has a null character at the end of the file. This returns a preprocessing 2871efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// token, not a normal token, as such, it is an internal interface. It assumes 2872efb173ddd95325c7fd3c15070392b27c07a49a85Chris Lattner/// that the Flags of result have been cleared before calling this. 2873d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedmanbool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { 28745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerLexNextToken: 28755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // New token, can't need cleaning yet. 2876d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::NeedsCleaning); 28775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Result.setIdentifierInfo(0); 28781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 28795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // CurPtr - Cache BufferPtr in an automatic variable. 28805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const char *CurPtr = BufferPtr; 28815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 28825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Small amounts of horizontal whitespace is very common between tokens. 28835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if ((*CurPtr == ' ') || (*CurPtr == '\t')) { 28845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 28855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while ((*CurPtr == ' ') || (*CurPtr == '\t')) 28865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ++CurPtr; 28871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2888d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // If we are keeping whitespace and other tokens, just return what we just 2889d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // skipped. The next lexer invocation will return the token after the 2890d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner // whitespace. 2891d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner if (isKeepWhitespaceMode()) { 28929e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, tok::unknown); 28936aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose // FIXME: The next token will not have LeadingSpace set. 2894d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 2895d88dc48e33d71732708960170e57a3d1bdc8f847Chris Lattner } 28961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 28975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer BufferPtr = CurPtr; 2898d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 28995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 29001eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. 29021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Read a character, advancing over it. 29045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer char Char = getAndAdvanceChar(CurPtr, Result); 29059e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner tok::TokenKind Kind; 29061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (Char) { 29085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 0: // Null. 29095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Found end of file? 2910d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CurPtr-1 == BufferEnd) 2911d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return LexEndOfFile(Result, CurPtr-1); 29121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29137d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // Check if we are performing code completion. 29147d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis if (isCodeCompletionPoint(CurPtr-1)) { 29157d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis // Return the code-completion token. 29167d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis Result.startToken(); 29177d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr, tok::code_completion); 2918d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 29197d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis } 29207d100872341f233c81e1d7b72b40457e62c36862Argyrios Kyrtzidis 292174d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 292274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::null_in_file); 2923d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 2924d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 2925d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 29261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2927d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We know the lexer hasn't changed, so just try again with this lexer. 2928d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 2929d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 2930a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 2931a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner case 26: // DOS & CP/M EOF: "^Z". 2932a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner // If we're in Microsoft extensions mode, treat this as end of file. 2933d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (LangOpts.MicrosoftExt) 2934d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return LexEndOfFile(Result, CurPtr-1); 2935d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2936a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner // If Microsoft extensions are disabled, this is just random garbage. 2937a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner Kind = tok::unknown; 2938a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner break; 2939a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 29405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\n': 29415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\r': 29425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // If we are inside a preprocessor directive and we see the end of line, 294384021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne // we know we are done with the directive, so return an EOD token. 29445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingPreprocessorDirective) { 29455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Done parsing the "line". 29465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ParsingPreprocessorDirective = false; 29471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Restore comment saving mode, in case it was disabled for directive. 29491a8354659a6007bbae3b5d9161a56ecc8f61a219David Blaikie if (PP) 29506aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose resetExtendedTokenMode(); 29511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Since we consumed a newline, we are back at the start of a line. 29535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer IsAtStartOfLine = true; 2954d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman IsAtPhysicalStartOfLine = true; 29551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 295684021556baceb76eedf7d44be8ba71d9b8cfaccePeter Collingbourne Kind = tok::eod; 29575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 29585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 29596aad4a31b35df07fe818f193fcfd3c0197aea467Jordan Rose 29605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // No leading whitespace seen so far. 2961d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.clearFlag(Token::LeadingSpace); 29621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 2963d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 2964d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 2965d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 2966d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 2967d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 2968d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 29695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ' ': 29705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\t': 29715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\f': 29725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\v': 29738133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner SkipHorizontalWhitespace: 2974d217773f106856a11879ec79dc468efefaf2ee75Chris Lattner Result.setFlag(Token::LeadingSpace); 2975d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 2976d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 29778133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner 29788133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner SkipIgnoredUnits: 29798133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner CurPtr = BufferPtr; 29801eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 29818133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner // If the next token is obviously a // or /* */ comment, skip it efficiently 29828133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner // too (without going through the big switch stmt). 29838402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && 2984a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman LangOpts.LineComment && 2985a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { 2986d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) 2987d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 29888133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipIgnoredUnits; 2989fa95a019da00b926d64ff83358ba73bbc6ae1e37Chris Lattner } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { 2990d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) 2991d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 29928133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipIgnoredUnits; 29938133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner } else if (isHorizontalWhitespace(*CurPtr)) { 29948133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner goto SkipHorizontalWhitespace; 29958133cfcc0b672cc86d1ef73ce0c61eccecdb3690Chris Lattner } 2996d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 2997d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 2998d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 2999a2bf105739cef7aea8e4d219629c627fcd11742eChris Lattner 30003a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // C99 6.4.4.1: Integer Constants. 30013a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // C99 6.4.4.2: Floating Constants. 30023a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '0': case '1': case '2': case '3': case '4': 30033a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '5': case '6': case '7': case '8': case '9': 30043a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Notify MIOpt that we read a non-whitespace/non-comment token. 30053a5707766850f9ee9daa35299794328b5caf96dcChris Lattner MIOpt.ReadToken(); 30063a5707766850f9ee9daa35299794328b5caf96dcChris Lattner return LexNumericConstant(Result, CurPtr); 30071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 30080093e12513c5c896434915d5e9126f51b780aa61Richard Smith case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal 30095cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // Notify MIOpt that we read a non-whitespace/non-comment token. 30105cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor MIOpt.ReadToken(); 30115cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30120093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (LangOpts.CPlusPlus11 || LangOpts.C11) { 30135cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor Char = getCharAndSize(CurPtr, SizeTmp); 30145cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30155cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-16 string literal 30165cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '"') 30175cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30185cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf16_string_literal); 30195cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30205cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-16 character constant 30215cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '\'') 30225cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30235cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf16_char_constant); 30245cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-16 raw string literal 30260093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char == 'R' && LangOpts.CPlusPlus11 && 30270093e12513c5c896434915d5e9126f51b780aa61Richard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 30282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 30292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 30302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 30312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf16_string_literal); 30322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char == '8') { 30342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); 30352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-8 string literal 30372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char2 == '"') 30382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexStringLiteral(Result, 30392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 30402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 30412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf8_string_literal); 30422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30430093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char2 == 'R' && LangOpts.CPlusPlus11) { 30442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper unsigned SizeTmp3; 30452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); 30462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-8 raw string literal 30472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char3 == '"') { 30482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 30492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 30502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 30512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp3, Result), 30522fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf8_string_literal); 30532fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 30542fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 30552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 30565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor } 30575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // treat u like the start of an identifier. 30595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexIdentifier(Result, CurPtr); 30605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30610093e12513c5c896434915d5e9126f51b780aa61Richard Smith case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal 30625cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // Notify MIOpt that we read a non-whitespace/non-comment token. 30635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor MIOpt.ReadToken(); 30645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30650093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (LangOpts.CPlusPlus11 || LangOpts.C11) { 30665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor Char = getCharAndSize(CurPtr, SizeTmp); 30675cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30685cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-32 string literal 30695cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '"') 30705cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30715cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf32_string_literal); 30725cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30735cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // UTF-32 character constant 30745cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor if (Char == '\'') 30755cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 30765cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::utf32_char_constant); 30772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // UTF-32 raw string literal 30790093e12513c5c896434915d5e9126f51b780aa61Richard Smith if (Char == 'R' && LangOpts.CPlusPlus11 && 30800093e12513c5c896434915d5e9126f51b780aa61Richard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 30812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 30822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 30832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 30842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::utf32_string_literal); 30855cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor } 30865cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30875cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor // treat U like the start of an identifier. 30885cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexIdentifier(Result, CurPtr); 30895cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor 30902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper case 'R': // Identifier or C++0x raw string literal 30912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Notify MIOpt that we read a non-whitespace/non-comment token. 30922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper MIOpt.ReadToken(); 30932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 309480ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11) { 30952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper Char = getCharAndSize(CurPtr, SizeTmp); 30962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 30972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper if (Char == '"') 30982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 30992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(CurPtr, SizeTmp, Result), 31002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::string_literal); 31012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper } 31022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // treat R like the start of an identifier. 31042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexIdentifier(Result, CurPtr); 31052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31063a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz"). 31075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 31105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Wide string literal. 31125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '"') 31135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31145cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::wide_string_literal); 31155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper // Wide raw string literal. 311780ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11 && Char == 'R' && 31182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') 31192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper return LexRawStringLiteral(Result, 31202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 31212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper SizeTmp2, Result), 31222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper tok::wide_string_literal); 31232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper 31245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Wide character constant. 31255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '\'') 31265cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), 31275cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor tok::wide_char_constant); 31285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FALL THROUGH, treating L like the start of an identifier. 31291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 31305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.2: Identifiers. 31315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': 31325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': 31332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/ 31345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'V': case 'W': case 'X': case 'Y': case 'Z': 31355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': 31365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 31375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ 31385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case 'v': case 'w': case 'x': case 'y': case 'z': 31395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '_': 31405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexIdentifier(Result, CurPtr); 31433a5707766850f9ee9daa35299794328b5caf96dcChris Lattner 31443a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '$': // $ in identifiers. 31454e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.DollarIdents) { 314674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 314774d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner Diag(CurPtr-1, diag::ext_dollar_in_identifier); 31483a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Notify MIOpt that we read a non-whitespace/non-comment token. 31493a5707766850f9ee9daa35299794328b5caf96dcChris Lattner MIOpt.ReadToken(); 31503a5707766850f9ee9daa35299794328b5caf96dcChris Lattner return LexIdentifier(Result, CurPtr); 31513a5707766850f9ee9daa35299794328b5caf96dcChris Lattner } 31521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 31539e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 31543a5707766850f9ee9daa35299794328b5caf96dcChris Lattner break; 31551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 31565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.4: Character Constants. 31575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\'': 31585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexCharConstant(Result, CurPtr, tok::char_constant); 31615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.5: String Literals. 31635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '"': 31645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor return LexStringLiteral(Result, CurPtr, tok::string_literal); 31675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // C99 6.4.6: Punctuators. 31695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '?': 31709e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::question; 31715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '[': 31739e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_square; 31745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ']': 31769e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_square; 31775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '(': 31799e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_paren; 31805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ')': 31829e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_paren; 31835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '{': 31859e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_brace; 31865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '}': 31889e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_brace; 31895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 31905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '.': 31915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 31925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char >= '0' && Char <= '9') { 31935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 31945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 31955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 31965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); 31974e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CPlusPlus && Char == '*') { 31989e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::periodstar; 31995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr += SizeTmp; 32005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '.' && 32015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { 32029e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ellipsis; 32035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 32045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 32055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32069e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::period; 32075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '&': 32105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '&') { 32129e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ampamp; 32135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 32159e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::ampequal; 32165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32189e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::amp; 32195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump case '*': 32225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (getCharAndSize(CurPtr, SizeTmp) == '=') { 32239e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::starequal; 32245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32269e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::star; 32275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '+': 32305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '+') { 32325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32339e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plusplus; 32345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 32355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32369e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plusequal; 32375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32389e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::plus; 32395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '-': 32425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 32439e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner if (Char == '-') { // -- 32445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32459e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minusminus; 32464e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '>' && LangOpts.CPlusPlus && 32479e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* 32485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 32495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 32509e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::arrowstar; 32519e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner } else if (Char == '>') { // -> 32525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32539e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::arrow; 32549e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner } else if (Char == '=') { // -= 32555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32569e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minusequal; 32575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32589e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::minus; 32595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '~': 32629e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::tilde; 32635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '!': 32655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (getCharAndSize(CurPtr, SizeTmp) == '=') { 32669e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::exclaimequal; 32675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 32685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 32699e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::exclaim; 32705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 32715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 32725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '/': 32735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // 6.4.9: Comments 32745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 3275bb23628148f555a4cf71f98c27096a7a804c085cNico Weber if (Char == '/') { // Line comment. 3276bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // Even if Line comments are disabled (e.g. in C89 mode), we generally 32778402c73dd880e8af46c826d873681820aebe32ebChris Lattner // want to lex this as a comment. There is one problem with this though, 32788402c73dd880e8af46c826d873681820aebe32ebChris Lattner // that in one particular corner case, this can change the behavior of the 32798402c73dd880e8af46c826d873681820aebe32ebChris Lattner // resultant program. For example, In "foo //**/ bar", C89 would lex 3280bb23628148f555a4cf71f98c27096a7a804c085cNico Weber // this as "foo / bar" and langauges with Line comments would lex it as 32818402c73dd880e8af46c826d873681820aebe32ebChris Lattner // "foo". Check to see if the character after the second slash is a '*'. 32828402c73dd880e8af46c826d873681820aebe32ebChris Lattner // If so, we will lex that as a "/" instead of the start of a comment. 3283693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose // However, we never do this if we are just preprocessing. 3284a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman bool TreatAsComment = LangOpts.LineComment && 3285a601579fa0c87b742797c1249e39cb54ec83083fEli Friedman (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); 3286693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (!TreatAsComment) 3287693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (!(PP && PP->isPreprocessedOutput())) 3288693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; 3289693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose 3290693fdfa069b74ffe1f79caa3c9ea8754555d29bdJordan Rose if (TreatAsComment) { 3291d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), 3292d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine)) 3293d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 32941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 32958402c73dd880e8af46c826d873681820aebe32ebChris Lattner // It is common for the tokens immediately after a // comment to be 32968402c73dd880e8af46c826d873681820aebe32ebChris Lattner // whitespace (indentation for the next line). Instead of going through 32978402c73dd880e8af46c826d873681820aebe32ebChris Lattner // the big switch, handle it efficiently now. 32988402c73dd880e8af46c826d873681820aebe32ebChris Lattner goto SkipIgnoredUnits; 32998402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33008402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 33028402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (Char == '*') { // /**/ comment. 3303d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), 3304d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman TokAtPhysicalStartOfLine)) 3305d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // There is a token to return. 3306d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3307d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3308d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3309d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 33108402c73dd880e8af46c826d873681820aebe32ebChris Lattner } 33111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 33128402c73dd880e8af46c826d873681820aebe32ebChris Lattner if (Char == '=') { 33135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33149e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::slashequal; 33155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33169e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::slash; 33175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '%': 33205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 33229e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::percentequal; 33235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33244e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == '>') { 33259e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_brace; // '%>' -> '}' 33265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33274e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == ':') { 33285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { 33319e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashhash; // '%:%:' -> '##' 33325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 33335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer SizeTmp2, Result); 33344e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize 33355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 333674d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 333766d5ce11b9426f6a59f61a03cbd8dbf047cc9350Ted Kremenek Diag(BufferPtr, diag::ext_charize_microsoft); 33389e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashat; 3339e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner } else { // '%:' -> '#' 33405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We parsed a # character. If this occurs at the start of the line, 33415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it's actually the start of a preprocessing directive. Callback to 33425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the preprocessor to handle it. 33435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FIXME: -fpreprocessed mode?? 3344d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) 33453185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis goto HandleDirective; 33461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3347e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner Kind = tok::hash; 33485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 33509e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::percent; 33515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 33525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 33535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '<': 33545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 33555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (ParsingFilename) { 33569cb51ce03caf0755759f7e67aea7d4be684ff738Chris Lattner return LexAngledStringLiteral(Result, CurPtr); 33575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '<') { 335834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); 335934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (After == '=') { 336034f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::lesslessequal; 336134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 336234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner SizeTmp2, Result); 336334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { 336434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is actually a '<<<<<<<' version control conflict marker, 336534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // recognize it as such and recover nicely. 336634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 3367d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { 3368d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is '<<<<' and we're in a Perforce-style conflict marker, 3369d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // ignore it. 3370d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith goto LexNextToken; 33714e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CUDA && After == '<') { 33721b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne Kind = tok::lesslessless; 33731b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 33741b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne SizeTmp2, Result); 337534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else { 337634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 337734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::lessless; 337834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 33795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '=') { 33805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 33819e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::lessequal; 33824e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' 338380ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith if (LangOpts.CPlusPlus11 && 338487a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { 338587a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // C++0x [lex.pptoken]p3: 338687a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // Otherwise, if the next three characters are <:: and the subsequent 338787a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // character is neither : nor >, the < is treated as a preprocessor 338887a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // token by itself and not as the first character of the alternative 338987a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith // token <:. 339087a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith unsigned SizeTmp3; 339187a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); 339287a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith if (After != ':' && After != '>') { 339387a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith Kind = tok::less; 3394661a99690bc133bbaa029da925481d4a860dec90Richard Smith if (!isLexingRawMode()) 3395661a99690bc133bbaa029da925481d4a860dec90Richard Smith Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon); 339687a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith break; 339787a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith } 339887a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith } 339987a1e19735a27d84edc2ac1331c040e2fb4c3b1aRichard Smith 34005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34019e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_square; 34024e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' 34035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34049e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::l_brace; 34055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34069e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::less; 34075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '>': 34105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 34125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34139e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::greaterequal; 34145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '>') { 341534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); 341634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (After == '=') { 341734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 341834f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner SizeTmp2, Result); 341934f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::greatergreaterequal; 3420d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { 3421d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is actually a '>>>>' conflict marker, recognize it as such 3422d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // and recover nicely. 3423d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith goto LexNextToken; 342434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { 342534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is '>>>>>>>' and we're in a conflict marker, ignore it. 342634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 34274e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CUDA && After == '>') { 34281b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne Kind = tok::greatergreatergreater; 34291b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), 34301b791d6465d42a9763927be1dd8af229efcbbf5ePeter Collingbourne SizeTmp2, Result); 343134f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } else { 343234f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 343334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner Kind = tok::greatergreater; 343434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner } 343534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 34365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34379e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::greater; 34385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '^': 34415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 34435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34449e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::caretequal; 34455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34469e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::caret; 34475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '|': 34505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 34529e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipeequal; 34535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else if (Char == '|') { 345534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner // If this is '|||||||' and we're in a conflict marker, ignore it. 345634f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) 345734f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 34589e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipepipe; 34595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 34619e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::pipe; 34625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ':': 34655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34664e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (LangOpts.Digraphs && Char == '>') { 34679e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::r_square; // ':>' -> ']' 34685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34694e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (LangOpts.CPlusPlus && Char == ':') { 34709e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::coloncolon; 34715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34721eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } else { 34739e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::colon; 34745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ';': 34779e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::semi; 34785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '=': 34805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '=') { 3482d5e1d606f8c22ebda17c6fbf952f8c1696428758Richard Smith // If this is '====' and we're in a conflict marker, ignore it. 348334f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) 348434f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner goto LexNextToken; 348534f349da38a7bd99103e12d8ea6c73bc8d025193Chris Lattner 34869e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::equalequal; 34875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 34881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } else { 34899e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::equal; 34905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 34915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case ',': 34939e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::comma; 34945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 34955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '#': 34965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer Char = getCharAndSize(CurPtr, SizeTmp); 34975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (Char == '#') { 34989e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashhash; 34995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35004e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize 35019e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::hashat; 350274d15dfd183b2082e8a5d4dfbf66bd861b220901Chris Lattner if (!isLexingRawMode()) 350366d5ce11b9426f6a59f61a03cbd8dbf047cc9350Ted Kremenek Diag(BufferPtr, diag::ext_charize_microsoft); 35045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); 35055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } else { 35065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // We parsed a # character. If this occurs at the start of the line, 35075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // it's actually the start of a preprocessing directive. Callback to 35085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // the preprocessor to handle it. 35095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // FIXME: -fpreprocessed mode?? 3510d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) 35113185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis goto HandleDirective; 35121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3513e91e93225db2e66906878513c6ef4dd6a7ee2b6aChris Lattner Kind = tok::hash; 35145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 35165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 35173a5707766850f9ee9daa35299794328b5caf96dcChris Lattner case '@': 35183a5707766850f9ee9daa35299794328b5caf96dcChris Lattner // Objective C support. 35194e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie if (CurPtr[-1] == '@' && LangOpts.ObjC1) 35209e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::at; 35213a5707766850f9ee9daa35299794328b5caf96dcChris Lattner else 35229e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 35233a5707766850f9ee9daa35299794328b5caf96dcChris Lattner break; 35241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 3525c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // UCNs (C99 6.4.3, C++11 [lex.charset]p2) 35265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case '\\': 3527d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { 3528d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { 3529d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3530d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 3531d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3532d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3533d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3534d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 3535d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3536d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3537c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexUnicode(Result, CodePoint, CurPtr); 3538d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3539c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 35409e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner Kind = tok::unknown; 35415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 3542c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3543c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose default: { 3544c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose if (isASCII(Char)) { 3545c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose Kind = tok::unknown; 3546c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose break; 3547c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 3548c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3549c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose UTF32 CodePoint; 3550c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3551c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // We can't just reset CurPtr to BufferPtr because BufferPtr may point to 3552c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // an escaped newline. 3553c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose --CurPtr; 3554cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko ConversionResult Status = 3555cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, 3556cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko (const UTF8 *)BufferEnd, 3557cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko &CodePoint, 3558cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko strictConversion); 3559d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (Status == conversionOK) { 3560d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { 3561d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) 3562d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; // KeepWhitespaceMode 3563d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman 3564d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We only saw whitespace, so just try again with this lexer. 3565d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3566d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman goto LexNextToken; 3567d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3568c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose return LexUnicode(Result, CodePoint, CurPtr); 3569d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman } 3570c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 35710ed439487491e09faffdbabfacb1d050292c7723Jordan Rose if (isLexingRawMode() || ParsingPreprocessorDirective || 35720ed439487491e09faffdbabfacb1d050292c7723Jordan Rose PP->isPreprocessedOutput()) { 357320afc2977cd0a6bacbe6218a633cd59a24463e2fJordan Rose ++CurPtr; 357474c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Kind = tok::unknown; 357574c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose break; 357674c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose } 357774c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose 3578c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Non-ASCII characters tend to creep into source code unintentionally. 3579c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose // Instead of letting the parser complain about the unknown token, 3580ae82c2b7b62d742f56638fa3cfb0f550ddcaf315Jordan Rose // just diagnose the invalid UTF-8, then drop the character. 358174c2498bb9e339345ee32bdd095e76157cec3b86Jordan Rose Diag(CurPtr, diag::err_invalid_utf8); 3582c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose 3583c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose BufferPtr = CurPtr+1; 3584d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We're pretending the character didn't exist, so just try again with 3585d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // this lexer. 3586d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // (We manually eliminate the tail call to avoid recursion.) 3587c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose goto LexNextToken; 3588c7629d941557f7179eb8fa8a2e2a74d749cbaf7cJordan Rose } 35895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 35901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 35915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Notify MIOpt that we read a non-whitespace/non-comment token. 35925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer MIOpt.ReadToken(); 35935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 35945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer // Update the location of token as well as BufferPtr. 35959e6293d4dfd688429f77ee3b6edba9dfd7ada3a2Chris Lattner FormTokenWithChars(Result, CurPtr, Kind); 3596d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 35973185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 35983185d4ac30378995ef70421e2848f77524c2b5d5Argyrios KyrtzidisHandleDirective: 35993185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis // We parsed a # character and it's the start of a preprocessing directive. 36003185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 36013185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis FormTokenWithChars(Result, CurPtr, tok::hash); 36023185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis PP->HandleDirective(Result); 36033185d4ac30378995ef70421e2848f77524c2b5d5Argyrios Kyrtzidis 36043b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis if (PP->hadModuleLoaderFatalFailure()) { 36053b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis // With a fatal failure in the module loader, we abort parsing. 36063b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof"); 3607d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return true; 36083b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis } 36093b7deda7137e62810a810ce25b062927a9fc7c71Argyrios Kyrtzidis 3610d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman // We parsed the directive; lex a token with the new state. 3611d2f9308220af22bfc1bcd3bc2cad118dbd8be013Eli Friedman return false; 36125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 3613