CommentLexer.cpp revision 834a5bd311b4a32f89937ca5b6dd2b4111891859
12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h" 2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h" 3477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko#include "clang/Basic/ConvertUTF.h" 42d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h" 52d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h" 62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 72d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang { 82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments { 92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const { 112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm::errs() << "comments::Token Kind=" << Kind << " "; 122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko Loc.dump(SM); 132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; 142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 16477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkonamespace { 17477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLNamedCharacterReferenceCharacter(char C) { 18477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return (C >= 'a' && C <= 'z') || 19477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko (C >= 'A' && C <= 'Z'); 20477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLDecimalCharacterReferenceCharacter(char C) { 23477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return C >= '0' && C <= '9'; 24477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLHexCharacterReferenceCharacter(char C) { 27477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return (C >= '0' && C <= '9') || 28477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko (C >= 'a' && C <= 'f') || 29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko (C >= 'A' && C <= 'F'); 30477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 31834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko 32834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenkobool isHTMLTagName(StringRef Name) { 33834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko return llvm::StringSwitch<bool>(Name) 34834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("em", "strong", true) 35834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("tt", "i", "b", "big", "small", true) 36834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("strike", "s", "u", "font", true) 37834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("a", true) 38834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("hr", true) 39834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("div", "span", true) 40834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("h1", "h2", "h3", true) 41834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("h4", "h5", "h6", true) 42834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("code", true) 43834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("blockquote", true) 44834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("sub", "sup", true) 45834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("img", true) 46834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("p", true) 47834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("br", true) 48834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Case("pre", true) 49834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("ins", "del", true) 50834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("ul", "ol", "li", true) 51834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("dl", "dt", "dd", true) 52834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("table", "caption", true) 53834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("thead", "tfoot", "tbody", true) 54834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("colgroup", "col", true) 55834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Cases("tr", "th", "td", true) 56834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko .Default(false); 57834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko} 58477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} // unnamed namespace 59477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const { 61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return llvm::StringSwitch<StringRef>(Name) 62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("amp", "&") 63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("lt", "<") 64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("gt", ">") 65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("quot", "\"") 66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Case("apos", "\'") 67477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko .Default(""); 68477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 69477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 70477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const { 71477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko unsigned CodePoint = 0; 72477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 73477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko assert(isHTMLDecimalCharacterReferenceCharacter(Name[i])); 74477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint *= 10; 75477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint += Name[i] - '0'; 76477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 77477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 78477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); 79477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char *ResolvedPtr = Resolved; 80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) 81477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return StringRef(Resolved, ResolvedPtr - Resolved); 82477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 83477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return StringRef(); 84477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 85477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 86477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const { 87477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko unsigned CodePoint = 0; 88477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for (unsigned i = 0, e = Name.size(); i != e; ++i) { 89477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint *= 16; 90477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char C = Name[i]; 91477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko assert(isHTMLHexCharacterReferenceCharacter(C)); 92477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (C >= '0' && C <= '9') 93477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint += Name[i] - '0'; 94477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else if (C >= 'a' && C <= 'f') 95477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint += Name[i] - 'a' + 10; 96477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 97477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko CodePoint += Name[i] - 'A' + 10; 98477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 99477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 100477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); 101477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char *ResolvedPtr = Resolved; 102477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) 103477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return StringRef(Resolved, ResolvedPtr - Resolved); 104477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 105477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return StringRef(); 106477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 107477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() { 1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This function should be called only for C comments 1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(CommentState == LCS_InsideCComment); 1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == CommentEnd) 1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (*BufferPtr) { 1162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case ' ': 1172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\t': 1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\f': 1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\v': { 1202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *NewBufferPtr = BufferPtr; 1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NewBufferPtr++; 1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (NewBufferPtr == CommentEnd) 1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *NewBufferPtr; 1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (C == ' ' || C == '\t' || C == '\f' || C == '\v') { 1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NewBufferPtr++; 1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (NewBufferPtr == CommentEnd) 1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *NewBufferPtr; 1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '*') 1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = NewBufferPtr + 1; 1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '*': 1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace { 1438d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string. 1442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) { 1452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 1462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 1472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '\n' || C == '\r') 1482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 1512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) { 1542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == BufferEnd) 1552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*BufferPtr == '\n') 1582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko else { 1602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(*BufferPtr == '\r'); 1612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '\n') 1632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 1642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 1662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 1672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 168477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr, 169477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *BufferEnd) { 170477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 171477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr)) 172477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 173477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 174477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 175477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 176477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 177477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr, 178477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *BufferEnd) { 179477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 180477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr)) 181477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 182477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 183477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 184477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 185477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 186477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr, 187477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *BufferEnd) { 188477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 189477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr)) 190477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferPtr; 191477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 192477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return BufferEnd; 193477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 194477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 195a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) { 196a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko return (C >= 'a' && C <= 'z') || 197a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko (C >= 'A' && C <= 'Z'); 198a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko} 199a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko 2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) { 2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return (C >= 'a' && C <= 'z') || 2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (C >= 'A' && C <= 'Z') || 2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (C >= '0' && C <= '9'); 2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) { 2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isHTMLIdentifierCharacter(*BufferPtr)) 2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes. Escaping quotes inside 2152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed. 2162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// 2172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote. 2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd) 2192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{ 2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char Quote = *BufferPtr; 2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(Quote == '\"' || Quote == '\''); 2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 2242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == Quote && BufferPtr[-1] != '\\') 2272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHorizontalWhitespace(char C) { 2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return C == ' ' || C == '\t' || C == '\f' || C == '\v'; 2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isWhitespace(char C) { 2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return C == ' ' || C == '\n' || C == '\r' || 2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C == '\t' || C == '\f' || C == '\v'; 2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { 2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isWhitespace(*BufferPtr)) 2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 24964da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) { 25064da4e55c111f4733135e1780216609569767351Dmitri Gribenko return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd; 25164da4e55c111f4733135e1780216609569767351Dmitri Gribenko} 25264da4e55c111f4733135e1780216609569767351Dmitri Gribenko 2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) { 2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return (C >= 'a' && C <= 'z') || 2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (C >= 'A' && C <= 'Z') || 2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (C >= '0' && C <= '9'); 2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { 2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isCommandNameCharacter(*BufferPtr)) 2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments. 2682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs. 2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) { 2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *CurPtr = BufferPtr; 2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (CurPtr != BufferEnd) { 2722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *CurPtr; 2732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (C != '\n' && C != '\r') { 2742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CurPtr++; 2752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CurPtr == BufferEnd) 2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *CurPtr; 2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // We found a newline, check if it is escaped. 2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *EscapePtr = CurPtr - 1; 2812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while(isHorizontalWhitespace(*EscapePtr)) 2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EscapePtr--; 2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*EscapePtr == '\\' || 2852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' && 2862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) { 2872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // We found an escaped newline. 2882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CurPtr = skipNewline(CurPtr, BufferEnd); 2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else 2902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return CurPtr; // Not an escaped newline. 2912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferEnd; 2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments. 2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs. 2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) { 2982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko for ( ; BufferPtr != BufferEnd; ++BufferPtr) { 2992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*BufferPtr == '*') { 3002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr + 1 != BufferEnd); 3012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (*(BufferPtr + 1) == '/') 3022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return BufferPtr; 3032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm_unreachable("buffer end hit before '*/' was seen"); 3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace 3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) { 3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(CommentState == LCS_InsideBCPLComment || 3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState == LCS_InsideCComment); 3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (State) { 3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_Normal: 3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_VerbatimBlockFirstLine: 3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockFirstLine(T); 3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LS_VerbatimBlockBody: 3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockBody(T); 3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 322962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko case LS_VerbatimLineText: 323962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko lexVerbatimLineText(T); 324962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko return; 3253f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko case LS_HTMLStartTag: 3263f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko lexHTMLStartTag(T); 3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3283f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko case LS_HTMLEndTag: 3293f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko lexHTMLEndTag(T); 3308d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko return; 3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(State == LS_Normal); 3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TokenPtr = BufferPtr; 3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(TokenPtr < CommentEnd); 3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (TokenPtr != CommentEnd) { 3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch(*TokenPtr) { 3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\\': 3402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '@': { 3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr == CommentEnd) { 343477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *TokenPtr; 3472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (C) { 3482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: 3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\\': case '@': case '&': case '$': 3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '#': case '<': case '>': case '%': 3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\"': case '.': case ':': 3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This is one of \\ \@ \& \$ etc escape sequences. 3552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { 3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // This is the \:: escape sequence. 3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 360f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); 3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::text); 362f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setText(UnescapedText); 3632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Don't make zero-length commands. 3672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (!isCommandNameCharacter(*TokenPtr)) { 368477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 3692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipCommandName(TokenPtr, CommentEnd); 3732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko unsigned Length = TokenPtr - (BufferPtr + 1); 3742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Hardcoded support for lexing LaTeX formula commands 3762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // \f$ \f[ \f] \f{ \f} as a single command. 3772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { 3782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *TokenPtr; 3792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { 3802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 3812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko Length++; 3822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const StringRef CommandName(BufferPtr + 1, Length); 3862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef EndName; 3872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 388aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko if (Traits.isVerbatimBlockCommand(CommandName, EndName)) { 3892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName); 3902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 392aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko if (Traits.isVerbatimLineCommand(CommandName)) { 393962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko setupAndLexVerbatimLine(T, TokenPtr); 3942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::command); 3972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setCommandName(CommandName); 3982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 3992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 401477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko case '&': 402477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko lexHTMLCharacterReference(T); 403477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 404477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '<': { 4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr == CommentEnd) { 408477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 4092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *TokenPtr; 412a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (isHTMLIdentifierStartingCharacter(C)) 4133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko setupAndLexHTMLStartTag(T); 4142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko else if (C == '/') 4153f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko setupAndLexHTMLEndTag(T); 416477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 417477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 418477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 4192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\n': 4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\r': 4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipNewline(TokenPtr, CommentEnd); 4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::newline); 4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) 4282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko skipLineStartingDecorations(); 4292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: { 4322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while (true) { 4332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 4342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr == CommentEnd) 4352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 436a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko const char C = *TokenPtr; 4372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if(C == '\n' || C == '\r' || 438477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko C == '\\' || C == '@' || C == '&' || C == '<') 4392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 4402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 441477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 4422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 4432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 4462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 4472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T, 4492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TextBegin, 4502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char Marker, StringRef EndName) { 4512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko VerbatimBlockEndCommandName.clear(); 4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@"); 4532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko VerbatimBlockEndCommandName.append(EndName); 4542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 455f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1)); 4562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); 457f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setVerbatimBlockName(Name); 4582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4598d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // If there is a newline following the verbatim opening command, skip the 4608d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // newline so that we don't create an tok::verbatim_block_line with empty 4618d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko // text content. 4628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (BufferPtr != CommentEnd) { 4638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko const char C = *BufferPtr; 4648d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (C == '\n' || C == '\r') { 4658d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko BufferPtr = skipNewline(BufferPtr, CommentEnd); 4668d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko State = LS_VerbatimBlockBody; 4678d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko return; 4688d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko } 4698d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko } 4708d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_VerbatimBlockFirstLine; 4722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) { 47564da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain: 4762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr < CommentEnd); 4772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // FIXME: It would be better to scan the text once, finding either the block 4792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // end command or newline. 4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // 4812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Extract current line. 4822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *Newline = findNewline(BufferPtr, CommentEnd); 4832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef Line(BufferPtr, Newline - BufferPtr); 4842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 4852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Look for end command in current line. 4862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko size_t Pos = Line.find(VerbatimBlockEndCommandName); 4878d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko const char *TextEnd; 4882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *NextLine; 4892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (Pos == StringRef::npos) { 4902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Current line is completely verbatim. 4918d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko TextEnd = Newline; 4922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko NextLine = skipNewline(Newline, CommentEnd); 4932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else if (Pos == 0) { 4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Current line contains just an end command. 4952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *End = BufferPtr + VerbatimBlockEndCommandName.size(); 496f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Name(BufferPtr + 1, End - (BufferPtr + 1)); 4972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, End, tok::verbatim_block_end); 498f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setVerbatimBlockName(Name); 4992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 5002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 5012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // There is some text, followed by end command. Extract text first. 5038d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko TextEnd = BufferPtr + Pos; 5048d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko NextLine = TextEnd; 50564da4e55c111f4733135e1780216609569767351Dmitri Gribenko // If there is only whitespace before end command, skip whitespace. 50664da4e55c111f4733135e1780216609569767351Dmitri Gribenko if (isWhitespace(BufferPtr, TextEnd)) { 50764da4e55c111f4733135e1780216609569767351Dmitri Gribenko BufferPtr = TextEnd; 50864da4e55c111f4733135e1780216609569767351Dmitri Gribenko goto again; 50964da4e55c111f4733135e1780216609569767351Dmitri Gribenko } 5102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 5112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko StringRef Text(BufferPtr, TextEnd - BufferPtr); 5132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, NextLine, tok::verbatim_block_line); 514f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setVerbatimBlockText(Text); 5152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_VerbatimBlockBody; 5172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) { 5202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(State == LS_VerbatimBlockBody); 5212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) 5232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko skipLineStartingDecorations(); 5242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexVerbatimBlockFirstLine(T); 5262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 528962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) { 5292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1); 530962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko formTokenWithChars(T, TextBegin, tok::verbatim_line_name); 5312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setVerbatimLineName(Name); 532962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 533962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko State = LS_VerbatimLineText; 534962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko} 535962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 536962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) { 537962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(State == LS_VerbatimLineText); 538962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 539962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko // Extract current line. 540962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko const char *Newline = findNewline(BufferPtr, CommentEnd); 541962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko const StringRef Text(BufferPtr, Newline - BufferPtr); 542962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko formTokenWithChars(T, Newline, tok::verbatim_line_text); 5432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setVerbatimLineText(Text); 544962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 545962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko State = LS_Normal; 5462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 5472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) { 549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *TokenPtr = BufferPtr; 550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko assert(*TokenPtr == '&'); 551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (TokenPtr == CommentEnd) { 553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko const char *NamePtr; 557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko bool isNamed = false; 558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko bool isDecimal = false; 559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko char C = *TokenPtr; 560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (isHTMLNamedCharacterReferenceCharacter(C)) { 561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd); 563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko isNamed = true; 564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else if (C == '#') { 565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (TokenPtr == CommentEnd) { 567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko C = *TokenPtr; 571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (isHTMLDecimalCharacterReferenceCharacter(C)) { 572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd); 574477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko isDecimal = true; 575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else if (C == 'x' || C == 'X') { 576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; 577477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko NamePtr = TokenPtr; 578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd); 579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else { 580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else { 584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (NamePtr == TokenPtr || TokenPtr == CommentEnd || 588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko *TokenPtr != ';') { 589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 590477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 591477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 592477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef Name(NamePtr, TokenPtr - NamePtr); 593477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko TokenPtr++; // Skip semicolon. 594477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef Resolved; 595477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (isNamed) 596477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLNamedCharacterReference(Name); 597477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else if (isDecimal) 598477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLDecimalCharacterReference(Name); 599477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko else 600477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Resolved = resolveHTMLHexCharacterReference(Name); 601477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 602477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko if (Resolved.empty()) { 603477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 604477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 605477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 606477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTokenWithChars(T, TokenPtr, tok::text); 607477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko T.setText(Resolved); 608477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko return; 609477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} 610477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 6113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) { 612a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko assert(BufferPtr[0] == '<' && 613a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko isHTMLIdentifierStartingCharacter(BufferPtr[1])); 6142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd); 615f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1)); 616834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko if (!isHTMLTagName(Name)) { 617834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko formTextToken(T, TagNameEnd); 618834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko return; 619834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko } 620834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko 6213f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko formTokenWithChars(T, TagNameEnd, tok::html_start_tag); 6223f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko T.setHTMLTagStartName(Name); 6232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = skipWhitespace(BufferPtr, CommentEnd); 6252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 626a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko const char C = *BufferPtr; 627a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (BufferPtr != CommentEnd && 628a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C))) 6293f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko State = LS_HTMLStartTag; 6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 6312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6323f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) { 6333f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(State == LS_HTMLStartTag); 6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TokenPtr = BufferPtr; 6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko char C = *TokenPtr; 6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (isHTMLIdentifierCharacter(C)) { 6382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd); 639f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko StringRef Ident(BufferPtr, TokenPtr - BufferPtr); 6402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_ident); 641f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko T.setHTMLIdent(Ident); 6422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 6432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (C) { 6442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '=': 6452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_equals); 6472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 6482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\"': 6492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '\'': { 6502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *OpenQuote = TokenPtr; 6512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd); 6522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *ClosingQuote = TokenPtr; 6532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (TokenPtr != CommentEnd) // Skip closing quote. 6542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_quoted_string); 6562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko T.setHTMLQuotedString(StringRef(OpenQuote + 1, 6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko ClosingQuote - (OpenQuote + 1))); 6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '>': 6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko TokenPtr++; 6622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_greater); 663a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko State = LS_Normal; 664a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko return; 665a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko case '/': 666a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko TokenPtr++; 667a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko if (TokenPtr != CommentEnd && *TokenPtr == '>') { 668a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko TokenPtr++; 669a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko formTokenWithChars(T, TokenPtr, tok::html_slash_greater); 670477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } else 671477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTextToken(T, TokenPtr); 672477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 673a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko State = LS_Normal; 674a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko return; 6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Now look ahead and return to normal state if we don't see any HTML tokens 6792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // ahead. 6802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr = skipWhitespace(BufferPtr, CommentEnd); 6812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == CommentEnd) { 6822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 6832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 6842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C = *BufferPtr; 687a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko if (!isHTMLIdentifierStartingCharacter(C) && 6882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko C != '=' && C != '\"' && C != '\'' && C != '>') { 6892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 6902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 6912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 6922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 6932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6943f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) { 6952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); 6962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 6972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); 6982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); 699834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin); 700834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko if (!isHTMLTagName(Name)) { 701834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko formTextToken(T, TagNameEnd); 702834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko return; 703834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko } 7042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *End = skipWhitespace(TagNameEnd, CommentEnd); 7062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7073f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko formTokenWithChars(T, End, tok::html_end_tag); 708834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko T.setHTMLTagEndName(Name); 7098d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7108d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (BufferPtr != CommentEnd && *BufferPtr == '>') 7113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko State = LS_HTMLEndTag; 7128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko} 7138d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7143f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) { 7158d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko assert(BufferPtr != CommentEnd && *BufferPtr == '>'); 7168d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 7178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko formTokenWithChars(T, BufferPtr + 1, tok::html_greater); 7188d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko State = LS_Normal; 7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 7202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 721aa58081902ad31927df02e8537d972eabe29d6dfDmitri GribenkoLexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits, 722477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko SourceLocation FileLoc, const CommentOptions &CommOpts, 7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *BufferStart, const char *BufferEnd): 724aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko Allocator(Allocator), Traits(Traits), 7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferStart(BufferStart), BufferEnd(BufferEnd), 7262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart), 7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState(LCS_BeforeComment), State(LS_Normal) { 7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) { 7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain: 7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch (CommentState) { 7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_BeforeComment: 7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr == BufferEnd) { 7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, BufferPtr, tok::eof); 7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return; 7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(*BufferPtr == '/'); 7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip first slash. 7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko switch(*BufferPtr) { 7422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '/': { // BCPL comment. 7432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip second slash. 7442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd) { 7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip Doxygen magic marker, if it is present. 7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // It might be missing because of a typo //< or /*<, or because we 7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // merged this non-Doxygen comment into a bunch of Doxygen comments 7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // around it: /** ... */ /* ... */ /** ... */ 7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (C == '/' || C == '!') 7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip less-than symbol that marks trailing comments. 7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip it even if the comment is not a Doxygen one, because //< and /*< 7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // are frequent typos. 7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '<') 7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_InsideBCPLComment; 7628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) 7638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko State = LS_Normal; 7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); 7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case '*': { // C comment. 7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; // Skip star. 7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip Doxygen magic marker. 7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char C = *BufferPtr; 7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!') 7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip less-than symbol that marks trailing comments. 7762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != BufferEnd && *BufferPtr == '<') 7772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr++; 7782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_InsideCComment; 7802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko State = LS_Normal; 7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentEnd = findCCommentEnd(BufferPtr, BufferEnd); 7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko default: 7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko llvm_unreachable("second character of comment should be '/' or '*'"); 7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_BetweenComments: { 7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Consecutive comments are extracted only if there is only whitespace 7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // between them. So we can search for the start of the next comment. 7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *EndWhitespace = BufferPtr; 7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko while(EndWhitespace != BufferEnd && *EndWhitespace != '/') 7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko EndWhitespace++; 7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Turn any whitespace between comments (and there is only whitespace 796a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // between them -- guaranteed by comment extraction) into a newline. We 797a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // have two newlines between C comments in total (first one was synthesized 798a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko // after a comment). 7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, EndWhitespace, tok::newline); 8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BeforeComment; 8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_InsideBCPLComment: 8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko case LCS_InsideCComment: 8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (BufferPtr != CommentEnd) { 8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko lexCommentText(T); 8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Skip C comment closing sequence. 8122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (CommentState == LCS_InsideCComment) { 8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr[0] == '*' && BufferPtr[1] == '/'); 8142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko BufferPtr += 2; 8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(BufferPtr <= BufferEnd); 8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Synthenize newline just after the C comment, regardless if there is 8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // actually a newline. 8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko formTokenWithChars(T, BufferPtr, tok::newline); 8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BetweenComments; 8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko break; 8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } else { 8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko // Don't synthesized a newline after BCPL comment. 8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko CommentState = LCS_BetweenComments; 8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko goto again; 8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8322d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok, 8332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const SourceManager &SourceMgr, 8342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool *Invalid) const { 8352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation Loc = Tok.getLocation(); 8362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc); 8372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool InvalidTemp = false; 8392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); 8402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko if (InvalidTemp) { 8412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko *Invalid = true; 8422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return StringRef(); 8432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 8442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *Begin = File.data() + LocInfo.second; 8462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return StringRef(Begin, Tok.getLength()); 8472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} 8482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 8492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments 8502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang 8512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 852