12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h" 2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h" 3efa78d163214fd9e909ab2bf6911edfbc7a2b9dfFariborz Jahanian#include "clang/AST/CommentDiagnostic.h" 4bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko#include "clang/Basic/CharInfo.h" 5c934dfe950a14fe447aa14a7dae25d00ee87c8bbDmitri Gribenko#include "llvm/ADT/StringExtras.h" 62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h" 7cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h" 82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h" 92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang { 112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments { 122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const { 142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm::errs() << "comments::Token Kind=" << Kind << " "; 152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko Loc.dump(SM); 162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; 172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 190ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLNamedCharacterReferenceCharacter(char C) { 20bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isLetter(C); 21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 230ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLDecimalCharacterReferenceCharacter(char C) { 24bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isDigit(C); 25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 270ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLHexCharacterReferenceCharacter(char C) { 28bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isHexDigit(C); 29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 30834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko 310ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline StringRef convertCodePointToUTF8( 320ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko llvm::BumpPtrAllocator &Allocator, 330ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko unsigned CodePoint) { 34658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); 35658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian char *ResolvedPtr = Resolved; 36cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) 37658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian return StringRef(Resolved, ResolvedPtr - Resolved); 38658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian else 39658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian return StringRef(); 40658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian} 415bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko 420ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkonamespace { 430ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko 445bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLTags.inc" 455bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLNamedCharacterReferences.inc" 465bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko 475bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko} // unnamed namespace 48658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian 49477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const { 505bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko // Fast path, first check a few most widely used named character references. 51477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return llvm::StringSwitch<StringRef>(Name) 52477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("amp", "&") 53477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("lt", "<") 54477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("gt", ">") 55477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("quot", "\"") 56477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("apos", "\'") 575bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko // Slow path. 585bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko .Default(translateHTMLNamedCharacterReferenceToUTF8(Name)); 59658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian} 60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const { 62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko unsigned CodePoint = 0; 63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko assert(isHTMLDecimalCharacterReferenceCharacter(Name[i])); 65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint *= 10; 66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint += Name[i] - '0'; 67477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 685bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko return convertCodePointToUTF8(Allocator, CodePoint); 695bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko} 70477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 715bd1e5ba000023910ad986a16dd16d7ca914750aDmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const { 725bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko unsigned CodePoint = 0; 735bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 745bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko CodePoint *= 16; 755bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko const char C = Name[i]; 765bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko assert(isHTMLHexCharacterReferenceCharacter(C)); 775bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko CodePoint += llvm::hexDigitValue(C); 785bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko } 795bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko return convertCodePointToUTF8(Allocator, CodePoint); 80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 81477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() { 832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This function should be called only for C comments 842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(CommentState == LCS_InsideCComment); 852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == CommentEnd) 872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (*BufferPtr) { 902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case ' ': 912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\t': 922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\f': 932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\v': { 942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *NewBufferPtr = BufferPtr; 952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NewBufferPtr++; 962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (NewBufferPtr == CommentEnd) 972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *NewBufferPtr; 100bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko while (isHorizontalWhitespace(C)) { 1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NewBufferPtr++; 1022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (NewBufferPtr == CommentEnd) 1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *NewBufferPtr; 1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '*') 1072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = NewBufferPtr + 1; 1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '*': 1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace { 1178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string. 1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) { 1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 120bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko if (isVerticalWhitespace(*BufferPtr)) 1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) { 1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == BufferEnd) 1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*BufferPtr == '\n') 1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko else { 1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(*BufferPtr == '\r'); 1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '\n') 1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 141477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr, 142477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *BufferEnd) { 143477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 144477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr)) 145477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 146477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 147477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 148477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 149477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 150477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr, 151477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *BufferEnd) { 152477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 153477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr)) 154477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 155477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 156477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 157477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 158477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 159477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr, 16031c71ca0eb83967085f21713b1dedff8c7f7a90fDmitri Gribenko const char *BufferEnd) { 161477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 162477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr)) 163477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 164477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 165477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 166477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 167477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 168a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) { 169bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isLetter(C); 170a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko} 171a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko 1722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) { 173bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isAlphanumeric(C); 1742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) { 1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isHTMLIdentifierCharacter(*BufferPtr)) 1792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 1822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes. Escaping quotes inside 1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed. 1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// 1872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote. 1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd) 1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{ 1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char Quote = *BufferPtr; 1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(Quote == '\"' || Quote == '\''); 1922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == Quote && BufferPtr[-1] != '\\') 1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { 2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isWhitespace(*BufferPtr)) 2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 21064da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) { 21164da4e55c111f4733135e1780216609569767351Dmitri Gribenko return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd; 21264da4e55c111f4733135e1780216609569767351Dmitri Gribenko} 21364da4e55c111f4733135e1780216609569767351Dmitri Gribenko 2148c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenkobool isCommandNameStartCharacter(char C) { 215bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isLetter(C); 2168c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko} 2178c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko 2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) { 219bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return isAlphanumeric(C); 2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { 2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isCommandNameCharacter(*BufferPtr)) 2252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments. 2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs. 2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) { 2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *CurPtr = BufferPtr; 2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (CurPtr != BufferEnd) { 235bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko while (!isVerticalWhitespace(*CurPtr)) { 2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CurPtr++; 2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CurPtr == BufferEnd) 2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // We found a newline, check if it is escaped. 2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *EscapePtr = CurPtr - 1; 2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while(isHorizontalWhitespace(*EscapePtr)) 2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EscapePtr--; 2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*EscapePtr == '\\' || 2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' && 2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) { 2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // We found an escaped newline. 2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CurPtr = skipNewline(CurPtr, BufferEnd); 2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else 2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return CurPtr; // Not an escaped newline. 2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments. 2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs. 2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) { 2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*BufferPtr == '*') { 2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr + 1 != BufferEnd); 2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*(BufferPtr + 1) == '/') 2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm_unreachable("buffer end hit before '*/' was seen"); 2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2680089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian 2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace 2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 271651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesvoid Lexer::formTokenWithChars(Token &Result, const char *TokEnd, 272651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines tok::TokenKind Kind) { 273651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines const unsigned TokLen = TokEnd - BufferPtr; 274651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.setLocation(getSourceLocation(BufferPtr)); 275651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.setKind(Kind); 276651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.setLength(TokLen); 277651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#ifndef NDEBUG 278651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.TextPtr = "<UNSET>"; 279651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines Result.IntVal = 7; 280651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#endif 281651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines BufferPtr = TokEnd; 282651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 283651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) { 2852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(CommentState == LCS_InsideBCPLComment || 2862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState == LCS_InsideCComment); 2872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (State) { 2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_Normal: 2902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 2912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_VerbatimBlockFirstLine: 2922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockFirstLine(T); 2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_VerbatimBlockBody: 2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockBody(T); 2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 297962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko case LS_VerbatimLineText: 298962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko lexVerbatimLineText(T); 299962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko return; 3003f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko case LS_HTMLStartTag: 3013f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko lexHTMLStartTag(T); 3022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3033f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko case LS_HTMLEndTag: 3043f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko lexHTMLEndTag(T); 3058d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko return; 3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(State == LS_Normal); 3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TokenPtr = BufferPtr; 3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(TokenPtr < CommentEnd); 3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (TokenPtr != CommentEnd) { 3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch(*TokenPtr) { 3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\\': 3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '@': { 316808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko // Commands that start with a backslash and commands that start with 317808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko // 'at' have equivalent semantics. But we keep information about the 318808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko // exact syntax in AST for comments. 319808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko tok::TokenKind CommandKind = 320808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; 3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr == CommentEnd) { 323477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 3242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *TokenPtr; 3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (C) { 3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: 3292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 3302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\\': case '@': case '&': case '$': 3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '#': case '<': case '>': case '%': 3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\"': case '.': case ':': 3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This is one of \\ \@ \& \$ etc escape sequences. 3352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { 3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This is the \:: escape sequence. 3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 340f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); 3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::text); 342f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setText(UnescapedText); 3432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Don't make zero-length commands. 3478c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko if (!isCommandNameStartCharacter(*TokenPtr)) { 348477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipCommandName(TokenPtr, CommentEnd); 3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko unsigned Length = TokenPtr - (BufferPtr + 1); 3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Hardcoded support for lexing LaTeX formula commands 3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // \f$ \f[ \f] \f{ \f} as a single command. 3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { 3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *TokenPtr; 3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { 3602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko Length++; 3622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 365176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines StringRef CommandName(BufferPtr + 1, Length); 3662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 367e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName); 368e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko if (!Info) { 369abbfa671539c74b5bec66a64964de984c908cdfaFariborz Jahanian if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) { 3700089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian StringRef CorrectedName = Info->Name; 371edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling SourceLocation Loc = getSourceLocation(BufferPtr); 372edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling SourceRange CommandRange(Loc.getLocWithOffset(1), 373edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling getSourceLocation(TokenPtr)); 374edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling Diag(Loc, diag::warn_correct_comment_command_name) 3750089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian << CommandName << CorrectedName 3760089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian << FixItHint::CreateReplacement(CommandRange, CorrectedName); 3770089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian } else { 378edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling formTokenWithChars(T, TokenPtr, tok::unknown_command); 379edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling T.setUnknownCommandName(CommandName); 3800089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian Diag(T.getLocation(), diag::warn_unknown_comment_command_name); 3810089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian return; 3820089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian } 3832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 384e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko if (Info->IsVerbatimBlockCommand) { 385e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info); 386e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return; 387e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko } 388e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko if (Info->IsVerbatimLineCommand) { 389e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko setupAndLexVerbatimLine(T, TokenPtr, Info); 3902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 392808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko formTokenWithChars(T, TokenPtr, CommandKind); 393e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko T.setCommandID(Info->getID()); 3942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 397477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko case '&': 398477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko lexHTMLCharacterReference(T); 399477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 400477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 4012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '<': { 4022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 4032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr == CommentEnd) { 404477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *TokenPtr; 408a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (isHTMLIdentifierStartingCharacter(C)) 4093f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko setupAndLexHTMLStartTag(T); 4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko else if (C == '/') 4113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko setupAndLexHTMLEndTag(T); 412477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 413477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 414477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 4152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\n': 4192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\r': 4202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipNewline(TokenPtr, CommentEnd); 4212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::newline); 4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) 4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko skipLineStartingDecorations(); 4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: { 428aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr). 429aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko find_first_of("\n\r\\@&<"); 430aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko if (End != StringRef::npos) 431aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko TokenPtr += End; 432aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko else 433aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko TokenPtr = CommentEnd; 434477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 4352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 4402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T, 4422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TextBegin, 443e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko char Marker, const CommandInfo *Info) { 444e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko assert(Info->IsVerbatimBlockCommand); 445e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko 4462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko VerbatimBlockEndCommandName.clear(); 4472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@"); 448e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko VerbatimBlockEndCommandName.append(Info->EndCommandName); 4492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); 451e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko T.setVerbatimBlockID(Info->getID()); 4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4538d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // If there is a newline following the verbatim opening command, skip the 4548d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // newline so that we don't create an tok::verbatim_block_line with empty 4558d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // text content. 456bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko if (BufferPtr != CommentEnd && 457bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko isVerticalWhitespace(*BufferPtr)) { 458bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko BufferPtr = skipNewline(BufferPtr, CommentEnd); 459bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko State = LS_VerbatimBlockBody; 460bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko return; 4618d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko } 4628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 4632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_VerbatimBlockFirstLine; 4642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 4652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) { 46764da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain: 4682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr < CommentEnd); 4692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // FIXME: It would be better to scan the text once, finding either the block 4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // end command or newline. 4722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // 4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Extract current line. 4742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *Newline = findNewline(BufferPtr, CommentEnd); 4752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef Line(BufferPtr, Newline - BufferPtr); 4762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Look for end command in current line. 4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko size_t Pos = Line.find(VerbatimBlockEndCommandName); 4798d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko const char *TextEnd; 4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *NextLine; 4812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (Pos == StringRef::npos) { 4822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Current line is completely verbatim. 4838d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko TextEnd = Newline; 4842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NextLine = skipNewline(Newline, CommentEnd); 4852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else if (Pos == 0) { 4862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Current line contains just an end command. 4872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *End = BufferPtr + VerbatimBlockEndCommandName.size(); 488f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Name(BufferPtr + 1, End - (BufferPtr + 1)); 4892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, End, tok::verbatim_block_end); 490e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID()); 4912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 4922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // There is some text, followed by end command. Extract text first. 4958d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko TextEnd = BufferPtr + Pos; 4968d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko NextLine = TextEnd; 49764da4e55c111f4733135e1780216609569767351Dmitri Gribenko // If there is only whitespace before end command, skip whitespace. 49864da4e55c111f4733135e1780216609569767351Dmitri Gribenko if (isWhitespace(BufferPtr, TextEnd)) { 49964da4e55c111f4733135e1780216609569767351Dmitri Gribenko BufferPtr = TextEnd; 50064da4e55c111f4733135e1780216609569767351Dmitri Gribenko goto again; 50164da4e55c111f4733135e1780216609569767351Dmitri Gribenko } 5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 5032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5048d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko StringRef Text(BufferPtr, TextEnd - BufferPtr); 5052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, NextLine, tok::verbatim_block_line); 506f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setVerbatimBlockText(Text); 5072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_VerbatimBlockBody; 5092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) { 5122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(State == LS_VerbatimBlockBody); 5132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) 5152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko skipLineStartingDecorations(); 5162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 51733337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar if (BufferPtr == CommentEnd) { 51833337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar formTokenWithChars(T, BufferPtr, tok::verbatim_block_line); 51933337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar T.setVerbatimBlockText(""); 52033337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar return; 52133337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar } 52233337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar 5232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockFirstLine(T); 5242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 526e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin, 527e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko const CommandInfo *Info) { 528e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko assert(Info->IsVerbatimLineCommand); 529962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko formTokenWithChars(T, TextBegin, tok::verbatim_line_name); 530e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko T.setVerbatimLineID(Info->getID()); 531962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 532962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko State = LS_VerbatimLineText; 533962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko} 534962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 535962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) { 536962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(State == LS_VerbatimLineText); 537962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 538962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko // Extract current line. 539962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko const char *Newline = findNewline(BufferPtr, CommentEnd); 540176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines StringRef Text(BufferPtr, Newline - BufferPtr); 541962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko formTokenWithChars(T, Newline, tok::verbatim_line_text); 5422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setVerbatimLineText(Text); 543962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 544962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko State = LS_Normal; 5452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 547477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) { 548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *TokenPtr = BufferPtr; 549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko assert(*TokenPtr == '&'); 550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (TokenPtr == CommentEnd) { 552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *NamePtr; 556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko bool isNamed = false; 557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko bool isDecimal = false; 558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char C = *TokenPtr; 559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (isHTMLNamedCharacterReferenceCharacter(C)) { 560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd); 562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko isNamed = true; 563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else if (C == '#') { 564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (TokenPtr == CommentEnd) { 566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko C = *TokenPtr; 570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (isHTMLDecimalCharacterReferenceCharacter(C)) { 571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd); 573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko isDecimal = true; 574477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else if (C == 'x' || C == 'X') { 575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 577477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd); 578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else { 579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else { 583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (NamePtr == TokenPtr || TokenPtr == CommentEnd || 587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko *TokenPtr != ';') { 588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 590477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 591477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef Name(NamePtr, TokenPtr - NamePtr); 592477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; // Skip semicolon. 593477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef Resolved; 5945bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko if (isNamed) 595477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLNamedCharacterReference(Name); 596477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else if (isDecimal) 597477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLDecimalCharacterReference(Name); 598477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 599477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLHexCharacterReference(Name); 600477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 601477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (Resolved.empty()) { 602477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 603477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 604477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 605477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTokenWithChars(T, TokenPtr, tok::text); 606477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko T.setText(Resolved); 607477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 608477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 609477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 6103f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) { 611a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko assert(BufferPtr[0] == '<' && 612a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko isHTMLIdentifierStartingCharacter(BufferPtr[1])); 6132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd); 614f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1)); 615834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko if (!isHTMLTagName(Name)) { 616834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko formTextToken(T, TagNameEnd); 617834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko return; 618834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko } 619834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko 6203f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko formTokenWithChars(T, TagNameEnd, tok::html_start_tag); 6213f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko T.setHTMLTagStartName(Name); 6222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = skipWhitespace(BufferPtr, CommentEnd); 6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 625a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko const char C = *BufferPtr; 626a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (BufferPtr != CommentEnd && 627a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C))) 6283f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko State = LS_HTMLStartTag; 6292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6313f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) { 6323f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(State == LS_HTMLStartTag); 6332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TokenPtr = BufferPtr; 6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *TokenPtr; 6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (isHTMLIdentifierCharacter(C)) { 6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd); 638f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Ident(BufferPtr, TokenPtr - BufferPtr); 6392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_ident); 640f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setHTMLIdent(Ident); 6412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 6422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (C) { 6432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '=': 6442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_equals); 6462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 6472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\"': 6482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\'': { 6492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *OpenQuote = TokenPtr; 6502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd); 6512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *ClosingQuote = TokenPtr; 6522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr != CommentEnd) // Skip closing quote. 6532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_quoted_string); 6552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setHTMLQuotedString(StringRef(OpenQuote + 1, 6562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko ClosingQuote - (OpenQuote + 1))); 6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '>': 6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_greater); 662a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko State = LS_Normal; 663a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko return; 664a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko case '/': 665a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko TokenPtr++; 666a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko if (TokenPtr != CommentEnd && *TokenPtr == '>') { 667a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko TokenPtr++; 668a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_slash_greater); 669477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else 670477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 671477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 672a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko State = LS_Normal; 673a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko return; 6742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Now look ahead and return to normal state if we don't see any HTML tokens 6782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // ahead. 6792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = skipWhitespace(BufferPtr, CommentEnd); 6802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == CommentEnd) { 6812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 6822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 6832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *BufferPtr; 686a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (!isHTMLIdentifierStartingCharacter(C) && 6872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C != '=' && C != '\"' && C != '\'' && C != '>') { 6882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 6892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 6902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 6922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6933f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) { 6942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); 6952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); 6972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); 698834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin); 699834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko if (!isHTMLTagName(Name)) { 700834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko formTextToken(T, TagNameEnd); 701834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko return; 702834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko } 7032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *End = skipWhitespace(TagNameEnd, CommentEnd); 7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7063f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko formTokenWithChars(T, End, tok::html_end_tag); 707834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko T.setHTMLTagEndName(Name); 7088d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7098d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (BufferPtr != CommentEnd && *BufferPtr == '>') 7103f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko State = LS_HTMLEndTag; 7118d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko} 7128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) { 7148d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko assert(BufferPtr != CommentEnd && *BufferPtr == '>'); 7158d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7168d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko formTokenWithChars(T, BufferPtr + 1, tok::html_greater); 7178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko State = LS_Normal; 7182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 720ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz JahanianLexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, 721ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian const CommandTraits &Traits, 722af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko SourceLocation FileLoc, 7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *BufferStart, const char *BufferEnd): 724ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian Allocator(Allocator), Diags(Diags), Traits(Traits), 7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferStart(BufferStart), BufferEnd(BufferEnd), 726af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko FileLoc(FileLoc), BufferPtr(BufferStart), 7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState(LCS_BeforeComment), State(LS_Normal) { 7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) { 7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain: 7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (CommentState) { 7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_BeforeComment: 7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == BufferEnd) { 7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, BufferPtr, tok::eof); 7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(*BufferPtr == '/'); 7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip first slash. 7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch(*BufferPtr) { 7422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '/': { // BCPL comment. 7432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip second slash. 7442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd) { 7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip Doxygen magic marker, if it is present. 7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // It might be missing because of a typo //< or /*<, or because we 7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // merged this non-Doxygen comment into a bunch of Doxygen comments 7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // around it: /** ... */ /* ... */ /** ... */ 7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '/' || C == '!') 7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip less-than symbol that marks trailing comments. 7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip it even if the comment is not a Doxygen one, because //< and /*< 7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // are frequent typos. 7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '<') 7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_InsideBCPLComment; 7628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) 7638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko State = LS_Normal; 7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); 7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '*': { // C comment. 7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip star. 7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip Doxygen magic marker. 7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!') 7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip less-than symbol that marks trailing comments. 7762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '<') 7772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_InsideCComment; 7802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentEnd = findCCommentEnd(BufferPtr, BufferEnd); 7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: 7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm_unreachable("second character of comment should be '/' or '*'"); 7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_BetweenComments: { 7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Consecutive comments are extracted only if there is only whitespace 7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // between them. So we can search for the start of the next comment. 7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *EndWhitespace = BufferPtr; 7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while(EndWhitespace != BufferEnd && *EndWhitespace != '/') 7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EndWhitespace++; 7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Turn any whitespace between comments (and there is only whitespace 796a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // between them -- guaranteed by comment extraction) into a newline. We 797a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // have two newlines between C comments in total (first one was synthesized 798a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // after a comment). 7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, EndWhitespace, tok::newline); 8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BeforeComment; 8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_InsideBCPLComment: 8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_InsideCComment: 8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != CommentEnd) { 8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexCommentText(T); 8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip C comment closing sequence. 8122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) { 8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr[0] == '*' && BufferPtr[1] == '/'); 8142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr += 2; 8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr <= BufferEnd); 8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Synthenize newline just after the C comment, regardless if there is 8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // actually a newline. 8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, BufferPtr, tok::newline); 8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BetweenComments; 8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Don't synthesized a newline after BCPL comment. 8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BetweenComments; 8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8322d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok, 8332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const SourceManager &SourceMgr, 8342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool *Invalid) const { 8352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation Loc = Tok.getLocation(); 8362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc); 8372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool InvalidTemp = false; 8392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); 8402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (InvalidTemp) { 8412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko *Invalid = true; 8422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return StringRef(); 8432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *Begin = File.data() + LocInfo.second; 8462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return StringRef(Begin, Tok.getLength()); 8472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 8482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments 8502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang 8512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 852