CommentLexer.cpp revision abbfa671539c74b5bec66a64964de984c908cdfa
12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h"
2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h"
3efa78d163214fd9e909ab2bf6911edfbc7a2b9dfFariborz Jahanian#include "clang/AST/CommentDiagnostic.h"
4bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko#include "clang/Basic/CharInfo.h"
5c934dfe950a14fe447aa14a7dae25d00ee87c8bbDmitri Gribenko#include "llvm/ADT/StringExtras.h"
62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h"
7cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h"
82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h"
92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang {
112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments {
122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const {
142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << "comments::Token Kind=" << Kind << " ";
152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  Loc.dump(SM);
162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
190ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
20bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
230ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
24bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isDigit(C);
25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
270ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLHexCharacterReferenceCharacter(char C) {
28bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isHexDigit(C);
29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
30834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
310ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline StringRef convertCodePointToUTF8(
320ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko                                      llvm::BumpPtrAllocator &Allocator,
330ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko                                      unsigned CodePoint) {
34658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
35658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *ResolvedPtr = Resolved;
36cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko  if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
37658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef(Resolved, ResolvedPtr - Resolved);
38658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  else
39658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef();
40658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
415bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
420ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkonamespace {
430ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko
445bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLTags.inc"
455bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
465bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
475bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko} // unnamed namespace
48658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian
49477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
505bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  // Fast path, first check a few most widely used named character references.
51477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return llvm::StringSwitch<StringRef>(Name)
52477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("amp", "&")
53477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("lt", "<")
54477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("gt", ">")
55477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("quot", "\"")
56477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("apos", "\'")
575bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      // Slow path.
585bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
59658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  unsigned CodePoint = 0;
63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint *= 10;
66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint += Name[i] - '0';
67477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
685bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
695bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko}
70477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
715bd1e5ba000023910ad986a16dd16d7ca914750aDmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
725bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  unsigned CodePoint = 0;
735bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
745bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint *= 16;
755bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    const char C = Name[i];
765bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    assert(isHTMLHexCharacterReferenceCharacter(C));
775bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint += llvm::hexDigitValue(C);
785bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  }
795bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
81477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() {
832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // This function should be called only for C comments
842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideCComment);
852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd)
872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (*BufferPtr) {
902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case ' ':
912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\t':
922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\f':
932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\v': {
942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *NewBufferPtr = BufferPtr;
952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NewBufferPtr++;
962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (NewBufferPtr == CommentEnd)
972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *NewBufferPtr;
100bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    while (isHorizontalWhitespace(C)) {
1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      NewBufferPtr++;
1022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (NewBufferPtr == CommentEnd)
1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *NewBufferPtr;
1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '*')
1072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr = NewBufferPtr + 1;
1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '*':
1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace {
1178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string.
1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) {
1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
120bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    if (isVerticalWhitespace(*BufferPtr))
1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == BufferEnd)
1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return BufferPtr;
1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (*BufferPtr == '\n')
1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  else {
1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '\r');
1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != BufferEnd && *BufferPtr == '\n')
1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++;
1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferPtr;
1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
141477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr,
142477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                        const char *BufferEnd) {
143477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
144477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
145477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
146477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
147477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
148477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
149477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
150477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr,
151477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
152477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
153477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
154477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
155477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
156477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
157477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
158477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
159477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr,
160477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
161477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
162477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
163477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
164477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
165477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
166477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
167477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
168a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) {
169bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
170a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko}
171a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko
1722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) {
173bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isAlphanumeric(C);
1742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isHTMLIdentifierCharacter(*BufferPtr))
1792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed.
1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko///
1872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote.
1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{
1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char Quote = *BufferPtr;
1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(Quote == '\"' || Quote == '\'');
1922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr++;
1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == Quote && BufferPtr[-1] != '\\')
1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isWhitespace(*BufferPtr))
2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
21064da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
21164da4e55c111f4733135e1780216609569767351Dmitri Gribenko  return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
21264da4e55c111f4733135e1780216609569767351Dmitri Gribenko}
21364da4e55c111f4733135e1780216609569767351Dmitri Gribenko
2148c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenkobool isCommandNameStartCharacter(char C) {
215bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
2168c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko}
2178c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko
2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) {
219bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isAlphanumeric(C);
2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isCommandNameCharacter(*BufferPtr))
2252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments.
2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs.
2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *CurPtr = BufferPtr;
2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (CurPtr != BufferEnd) {
235bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    while (!isVerticalWhitespace(*CurPtr)) {
2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr++;
2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CurPtr == BufferEnd)
2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferEnd;
2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // We found a newline, check if it is escaped.
2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EscapePtr = CurPtr - 1;
2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(isHorizontalWhitespace(*EscapePtr))
2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EscapePtr--;
2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*EscapePtr == '\\' ||
2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // We found an escaped newline.
2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr = skipNewline(CurPtr, BufferEnd);
2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else
2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return CurPtr; // Not an escaped newline.
2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments.
2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs.
2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*BufferPtr == '*') {
2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      assert(BufferPtr + 1 != BufferEnd);
2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (*(BufferPtr + 1) == '/')
2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferPtr;
2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm_unreachable("buffer end hit before '*/' was seen");
2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2680089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian
2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace
2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) {
2722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideBCPLComment ||
2732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         CommentState == LCS_InsideCComment);
2742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (State) {
2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_Normal:
2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockFirstLine:
2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockFirstLine(T);
2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
2812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockBody:
2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockBody(T);
2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
284962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  case LS_VerbatimLineText:
285962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    lexVerbatimLineText(T);
286962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    return;
2873f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLStartTag:
2883f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLStartTag(T);
2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
2903f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLEndTag:
2913f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLEndTag(T);
2928d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    return;
2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_Normal);
2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
2982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(TokenPtr < CommentEnd);
2992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (TokenPtr != CommentEnd) {
3002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*TokenPtr) {
3012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\\':
3022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '@': {
303808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // Commands that start with a backslash and commands that start with
304808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // 'at' have equivalent semantics.  But we keep information about the
305808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // exact syntax in AST for comments.
306808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        tok::TokenKind CommandKind =
307808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko            (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
310477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        char C = *TokenPtr;
3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        switch (C) {
3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        default:
3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          break;
3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\\': case '@': case '&': case '$':
3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '#':  case '<': case '>': case '%':
3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\"': case '.': case ':':
3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          // This is one of \\ \@ \& \$ etc escape sequences.
3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          TokenPtr++;
3232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
3242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            // This is the \:: escape sequence.
3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
327f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::text);
329f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          T.setText(UnescapedText);
3302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't make zero-length commands.
3348c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko        if (!isCommandNameStartCharacter(*TokenPtr)) {
335477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipCommandName(TokenPtr, CommentEnd);
3402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        unsigned Length = TokenPtr - (BufferPtr + 1);
3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Hardcoded support for lexing LaTeX formula commands
3432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // \f$ \f[ \f] \f{ \f} as a single command.
3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          C = *TokenPtr;
3462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
3472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            Length++;
3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const StringRef CommandName(BufferPtr + 1, Length);
3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
354e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
355e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (!Info) {
356e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::unknown_command);
357e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          T.setUnknownCommandName(CommandName);
358abbfa671539c74b5bec66a64964de984c908cdfaFariborz Jahanian          if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
3590089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            StringRef CorrectedName = Info->Name;
3600089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            SourceRange CommandRange(T.getLocation().getLocWithOffset(1),
3610089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian                                     T.getEndLocation());
3620089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            Diag(T.getLocation(), diag::warn_correct_comment_command_name)
3630089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian              << CommandName << CorrectedName
3640089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian              << FixItHint::CreateReplacement(CommandRange, CorrectedName);
3650089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian          } else {
3660089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            Diag(T.getLocation(), diag::warn_unknown_comment_command_name);
3670089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            return;
3680089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian          }
3692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
370e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimBlockCommand) {
371e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
372e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          return;
373e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        }
374e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimLineCommand) {
375e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimLine(T, TokenPtr, Info);
3762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
378808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        formTokenWithChars(T, TokenPtr, CommandKind);
379e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        T.setCommandID(Info->getID());
3802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
3812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
3822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
383477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      case '&':
384477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        lexHTMLCharacterReference(T);
385477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        return;
386477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
3872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '<': {
3882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
390477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *TokenPtr;
394a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko        if (isHTMLIdentifierStartingCharacter(C))
3953f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLStartTag(T);
3962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        else if (C == '/')
3973f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLEndTag(T);
398477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        else
399477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
400477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\n':
4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\r':
4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipNewline(TokenPtr, CommentEnd);
4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::newline);
4082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (CommentState == LCS_InsideCComment)
4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          skipLineStartingDecorations();
4112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      default: {
414aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
415aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko                         find_first_of("\n\r\\@&<");
416aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        if (End != StringRef::npos)
417aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr += End;
418aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        else
419aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr = CommentEnd;
420477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
4212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T,
4282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                     const char *TextBegin,
429e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                     char Marker, const CommandInfo *Info) {
430e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimBlockCommand);
431e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
4322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.clear();
4332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
434e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  VerbatimBlockEndCommandName.append(Info->EndCommandName);
4352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
437e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimBlockID(Info->getID());
4382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4398d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // If there is a newline following the verbatim opening command, skip the
4408d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // newline so that we don't create an tok::verbatim_block_line with empty
4418d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // text content.
442bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  if (BufferPtr != CommentEnd &&
443bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko      isVerticalWhitespace(*BufferPtr)) {
444bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    BufferPtr = skipNewline(BufferPtr, CommentEnd);
445bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    State = LS_VerbatimBlockBody;
446bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    return;
4478d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  }
4488d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
4492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockFirstLine;
4502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) {
45364da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain:
4542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr < CommentEnd);
4552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // FIXME: It would be better to scan the text once, finding either the block
4572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // end command or newline.
4582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  //
4592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Extract current line.
4602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
4612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef Line(BufferPtr, Newline - BufferPtr);
4622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Look for end command in current line.
4642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  size_t Pos = Line.find(VerbatimBlockEndCommandName);
4658d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  const char *TextEnd;
4662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *NextLine;
4672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (Pos == StringRef::npos) {
4682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line is completely verbatim.
4698d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = Newline;
4702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NextLine = skipNewline(Newline, CommentEnd);
4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else if (Pos == 0) {
4722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line contains just an end command.
4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
474f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
4752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, End, tok::verbatim_block_end);
476e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
4772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
4792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // There is some text, followed by end command.  Extract text first.
4818d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = BufferPtr + Pos;
4828d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    NextLine = TextEnd;
48364da4e55c111f4733135e1780216609569767351Dmitri Gribenko    // If there is only whitespace before end command, skip whitespace.
48464da4e55c111f4733135e1780216609569767351Dmitri Gribenko    if (isWhitespace(BufferPtr, TextEnd)) {
48564da4e55c111f4733135e1780216609569767351Dmitri Gribenko      BufferPtr = TextEnd;
48664da4e55c111f4733135e1780216609569767351Dmitri Gribenko      goto again;
48764da4e55c111f4733135e1780216609569767351Dmitri Gribenko    }
4882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4908d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  StringRef Text(BufferPtr, TextEnd - BufferPtr);
4912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, NextLine, tok::verbatim_block_line);
492f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  T.setVerbatimBlockText(Text);
4932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockBody;
4952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) {
4982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_VerbatimBlockBody);
4992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (CommentState == LCS_InsideCComment)
5012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    skipLineStartingDecorations();
5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  lexVerbatimBlockFirstLine(T);
5042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
506e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
507e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                    const CommandInfo *Info) {
508e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimLineCommand);
509962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
510e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimLineID(Info->getID());
511962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
512962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_VerbatimLineText;
513962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko}
514962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
515962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) {
516962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  assert(State == LS_VerbatimLineText);
517962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
518962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  // Extract current line.
519962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
520962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const StringRef Text(BufferPtr, Newline - BufferPtr);
521962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, Newline, tok::verbatim_line_text);
5222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  T.setVerbatimLineText(Text);
523962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
524962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_Normal;
5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
527477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) {
528477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *TokenPtr = BufferPtr;
529477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  assert(*TokenPtr == '&');
530477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++;
531477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (TokenPtr == CommentEnd) {
532477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
533477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
534477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
535477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *NamePtr;
536477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isNamed = false;
537477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isDecimal = false;
538477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char C = *TokenPtr;
539477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (isHTMLNamedCharacterReferenceCharacter(C)) {
540477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    NamePtr = TokenPtr;
541477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
542477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    isNamed = true;
543477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else if (C == '#') {
544477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr++;
545477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (TokenPtr == CommentEnd) {
546477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
547477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    C = *TokenPtr;
550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (isHTMLDecimalCharacterReferenceCharacter(C)) {
551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      isDecimal = true;
554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else if (C == 'x' || C == 'X') {
555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr++;
556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else {
559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else {
563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      *TokenPtr != ';') {
568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Name(NamePtr, TokenPtr - NamePtr);
572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++; // Skip semicolon.
573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Resolved;
5745bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  if (isNamed)
575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLNamedCharacterReference(Name);
576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else if (isDecimal)
577477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLDecimalCharacterReference(Name);
578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLHexCharacterReference(Name);
580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (Resolved.empty()) {
582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  formTokenWithChars(T, TokenPtr, tok::text);
586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  T.setText(Resolved);
587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return;
588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
5903f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) {
591a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  assert(BufferPtr[0] == '<' &&
592a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         isHTMLIdentifierStartingCharacter(BufferPtr[1]));
5932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
594f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
595834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
596834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
597834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
598834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
599834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
6003f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
6013f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  T.setHTMLTagStartName(Name);
6022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
605a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  const char C = *BufferPtr;
606a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (BufferPtr != CommentEnd &&
607a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
6083f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLStartTag;
6092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) {
6123f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  assert(State == LS_HTMLStartTag);
6132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
6152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  char C = *TokenPtr;
6162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (isHTMLIdentifierCharacter(C)) {
6172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
618f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
6192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, TokenPtr, tok::html_ident);
620f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    T.setHTMLIdent(Ident);
6212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
6222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch (C) {
6232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '=':
6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_equals);
6262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\"':
6282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\'': {
6292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *OpenQuote = TokenPtr;
6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
6312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *ClosingQuote = TokenPtr;
6322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (TokenPtr != CommentEnd) // Skip closing quote.
6332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      T.setHTMLQuotedString(StringRef(OpenQuote + 1,
6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                      ClosingQuote - (OpenQuote + 1)));
6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '>':
6402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_greater);
642a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      State = LS_Normal;
643a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      return;
644a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko    case '/':
645a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      TokenPtr++;
646a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      if (TokenPtr != CommentEnd && *TokenPtr == '>') {
647a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        TokenPtr++;
648a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
649477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      } else
650477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
651477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
652a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      State = LS_Normal;
653a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      return;
6542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Now look ahead and return to normal state if we don't see any HTML tokens
6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // ahead.
6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd) {
6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  C = *BufferPtr;
666a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (!isHTMLIdentifierStartingCharacter(C) &&
6672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C != '=' && C != '\"' && C != '\'' && C != '>') {
6682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6733f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) {
6742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
678834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
679834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
680834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
681834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
682834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
6832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *End = skipWhitespace(TagNameEnd, CommentEnd);
6852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6863f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, End, tok::html_end_tag);
687834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  T.setHTMLTagEndName(Name);
6888d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6898d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd && *BufferPtr == '>')
6903f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLEndTag;
6918d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko}
6928d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6933f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) {
6948d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
6958d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6968d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
6978d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  State = LS_Normal;
6982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
700ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz JahanianLexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
701ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian             const CommandTraits &Traits,
702af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko             SourceLocation FileLoc,
7032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko             const char *BufferStart, const char *BufferEnd):
704ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian    Allocator(Allocator), Diags(Diags), Traits(Traits),
7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferStart(BufferStart), BufferEnd(BufferEnd),
706af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko    FileLoc(FileLoc), BufferPtr(BufferStart),
7072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState(LCS_BeforeComment), State(LS_Normal) {
7082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) {
7112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain:
7122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (CommentState) {
7132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BeforeComment:
7142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr == BufferEnd) {
7152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, BufferPtr, tok::eof);
7162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
7172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '/');
7202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++; // Skip first slash.
7212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*BufferPtr) {
7222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '/': { // BCPL comment.
7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip second slash.
7242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd) {
7262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Skip Doxygen magic marker, if it is present.
7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // It might be missing because of a typo //< or /*<, or because we
7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // merged this non-Doxygen comment into a bunch of Doxygen comments
7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // around it: /** ... */ /* ... */ /** ... */
7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *BufferPtr;
7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (C == '/' || C == '!')
7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          BufferPtr++;
7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip it even if the comment is not a Doxygen one, because //< and /*<
7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // are frequent typos.
7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideBCPLComment;
7428d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
7438d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko        State = LS_Normal;
7442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
7452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '*': { // C comment.
7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip star.
7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip Doxygen magic marker.
7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char C = *BufferPtr;
7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideCComment;
7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      State = LS_Normal;
7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
7622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    default:
7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      llvm_unreachable("second character of comment should be '/' or '*'");
7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BetweenComments: {
7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Consecutive comments are extracted only if there is only whitespace
7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // between them.  So we can search for the start of the next comment.
7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EndWhitespace = BufferPtr;
7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EndWhitespace++;
7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Turn any whitespace between comments (and there is only whitespace
776a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // between them -- guaranteed by comment extraction) into a newline.  We
777a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // have two newlines between C comments in total (first one was synthesized
778a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // after a comment).
7792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, EndWhitespace, tok::newline);
7802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState = LCS_BeforeComment;
7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideBCPLComment:
7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideCComment:
7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != CommentEnd) {
7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      lexCommentText(T);
7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else {
7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip C comment closing sequence.
7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CommentState == LCS_InsideCComment) {
7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr += 2;
7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr <= BufferEnd);
7962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Synthenize newline just after the C comment, regardless if there is
7982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // actually a newline.
7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, BufferPtr, tok::newline);
8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        break;
8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      } else {
8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't synthesized a newline after BCPL comment.
8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        goto again;
8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8122d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok,
8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             const SourceManager &SourceMgr,
8142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             bool *Invalid) const {
8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation Loc = Tok.getLocation();
8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool InvalidTemp = false;
8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (InvalidTemp) {
8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    *Invalid = true;
8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return StringRef();
8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Begin = File.data() + LocInfo.second;
8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return StringRef(Begin, Tok.getLength());
8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments
8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang
8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
832