CommentLexer.cpp revision 5bd1e5ba000023910ad986a16dd16d7ca914750a
12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h"
2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h"
3c934dfe950a14fe447aa14a7dae25d00ee87c8bbDmitri Gribenko#include "llvm/ADT/StringExtras.h"
42d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h"
5cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h"
62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h"
72d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang {
92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments {
102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const {
122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << "comments::Token Kind=" << Kind << " ";
132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  Loc.dump(SM);
142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
17477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkonamespace {
18477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLNamedCharacterReferenceCharacter(char C) {
19477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return (C >= 'a' && C <= 'z') ||
20477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'A' && C <= 'Z');
21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
23477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLDecimalCharacterReferenceCharacter(char C) {
24477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return C >= '0' && C <= '9';
25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
27477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLHexCharacterReferenceCharacter(char C) {
28477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return (C >= '0' && C <= '9') ||
29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'a' && C <= 'f') ||
30477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'A' && C <= 'F');
31477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
32834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
335bd1e5ba000023910ad986a16dd16d7ca914750aDmitri GribenkoStringRef convertCodePointToUTF8(llvm::BumpPtrAllocator &Allocator,
345bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko                                 unsigned CodePoint) {
35658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
36658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *ResolvedPtr = Resolved;
37cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko  if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
38658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef(Resolved, ResolvedPtr - Resolved);
39658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  else
40658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef();
41658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
425bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
435bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLTags.inc"
445bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
455bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
465bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko} // unnamed namespace
47658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian
48477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
495bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  // Fast path, first check a few most widely used named character references.
50477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return llvm::StringSwitch<StringRef>(Name)
51477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("amp", "&")
52477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("lt", "<")
53477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("gt", ">")
54477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("quot", "\"")
55477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("apos", "\'")
565bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      // Slow path.
575bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
58658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
59477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  unsigned CodePoint = 0;
62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint *= 10;
65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint += Name[i] - '0';
66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
675bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
685bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko}
69477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
705bd1e5ba000023910ad986a16dd16d7ca914750aDmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
715bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  unsigned CodePoint = 0;
725bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
735bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint *= 16;
745bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    const char C = Name[i];
755bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    assert(isHTMLHexCharacterReferenceCharacter(C));
765bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint += llvm::hexDigitValue(C);
775bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  }
785bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
79477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() {
822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // This function should be called only for C comments
832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideCComment);
842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd)
862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (*BufferPtr) {
892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case ' ':
902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\t':
912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\f':
922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\v': {
932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *NewBufferPtr = BufferPtr;
942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NewBufferPtr++;
952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (NewBufferPtr == CommentEnd)
962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *NewBufferPtr;
992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
1002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      NewBufferPtr++;
1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (NewBufferPtr == CommentEnd)
1022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *NewBufferPtr;
1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '*')
1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr = NewBufferPtr + 1;
1072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '*':
1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace {
1168d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string.
1172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) {
1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
1202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '\n' || C == '\r')
1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == BufferEnd)
1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return BufferPtr;
1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (*BufferPtr == '\n')
1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  else {
1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '\r');
1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != BufferEnd && *BufferPtr == '\n')
1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++;
1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferPtr;
1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
141477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr,
142477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                        const char *BufferEnd) {
143477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
144477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
145477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
146477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
147477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
148477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
149477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
150477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr,
151477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
152477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
153477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
154477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
155477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
156477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
157477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
158477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
159477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr,
160477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
161477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
162477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
163477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
164477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
165477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
166477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
167477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
168a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) {
169a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  return (C >= 'a' && C <= 'z') ||
170a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         (C >= 'A' && C <= 'Z');
171a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko}
172a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko
1732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) {
1742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return (C >= 'a' && C <= 'z') ||
1752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= 'A' && C <= 'Z') ||
1762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= '0' && C <= '9');
1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
1802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isHTMLIdentifierCharacter(*BufferPtr))
1822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed.
1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko///
1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote.
1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
1922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{
1932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char Quote = *BufferPtr;
1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(Quote == '\"' || Quote == '\'');
1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr++;
1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == Quote && BufferPtr[-1] != '\\')
2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHorizontalWhitespace(char C) {
2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return C == ' ' || C == '\t' || C == '\f' || C == '\v';
2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isWhitespace(char C) {
2102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return C == ' ' || C == '\n' || C == '\r' ||
2112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         C == '\t' || C == '\f' || C == '\v';
2122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
2152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isWhitespace(*BufferPtr))
2172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
22264da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
22364da4e55c111f4733135e1780216609569767351Dmitri Gribenko  return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
22464da4e55c111f4733135e1780216609569767351Dmitri Gribenko}
22564da4e55c111f4733135e1780216609569767351Dmitri Gribenko
2268c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenkobool isCommandNameStartCharacter(char C) {
2278c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko  return (C >= 'a' && C <= 'z') ||
2288c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko         (C >= 'A' && C <= 'Z');
2298c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko}
2308c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko
2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) {
2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return (C >= 'a' && C <= 'z') ||
2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= 'A' && C <= 'Z') ||
2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= '0' && C <= '9');
2352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isCommandNameCharacter(*BufferPtr))
2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments.
2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs.
2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *CurPtr = BufferPtr;
2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (CurPtr != BufferEnd) {
2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *CurPtr;
2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while (C != '\n' && C != '\r') {
2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr++;
2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CurPtr == BufferEnd)
2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferEnd;
2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *CurPtr;
2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // We found a newline, check if it is escaped.
2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EscapePtr = CurPtr - 1;
2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(isHorizontalWhitespace(*EscapePtr))
2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EscapePtr--;
2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*EscapePtr == '\\' ||
2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // We found an escaped newline.
2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr = skipNewline(CurPtr, BufferEnd);
2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else
2682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return CurPtr; // Not an escaped newline.
2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments.
2742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs.
2752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*BufferPtr == '*') {
2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      assert(BufferPtr + 1 != BufferEnd);
2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (*(BufferPtr + 1) == '/')
2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferPtr;
2812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm_unreachable("buffer end hit before '*/' was seen");
2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace
2862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) {
2882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideBCPLComment ||
2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         CommentState == LCS_InsideCComment);
2902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (State) {
2922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_Normal:
2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockFirstLine:
2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockFirstLine(T);
2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockBody:
2982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockBody(T);
2992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
300962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  case LS_VerbatimLineText:
301962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    lexVerbatimLineText(T);
302962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    return;
3033f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLStartTag:
3043f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLStartTag(T);
3052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
3063f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLEndTag:
3073f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLEndTag(T);
3088d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    return;
3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_Normal);
3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(TokenPtr < CommentEnd);
3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (TokenPtr != CommentEnd) {
3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*TokenPtr) {
3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\\':
3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '@': {
3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
321477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        char C = *TokenPtr;
3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        switch (C) {
3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        default:
3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          break;
3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\\': case '@': case '&': case '$':
3302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '#':  case '<': case '>': case '%':
3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\"': case '.': case ':':
3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          // This is one of \\ \@ \& \$ etc escape sequences.
3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          TokenPtr++;
3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
3352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            // This is the \:: escape sequence.
3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
338f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::text);
340f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          T.setText(UnescapedText);
3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't make zero-length commands.
3458c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko        if (!isCommandNameStartCharacter(*TokenPtr)) {
346477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipCommandName(TokenPtr, CommentEnd);
3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        unsigned Length = TokenPtr - (BufferPtr + 1);
3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Hardcoded support for lexing LaTeX formula commands
3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // \f$ \f[ \f] \f{ \f} as a single command.
3552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          C = *TokenPtr;
3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            Length++;
3602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const StringRef CommandName(BufferPtr + 1, Length);
3642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
365e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
366e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (!Info) {
367e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::unknown_command);
368e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          T.setUnknownCommandName(CommandName);
3692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
371e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimBlockCommand) {
372e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
373e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          return;
374e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        }
375e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimLineCommand) {
376e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimLine(T, TokenPtr, Info);
3772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::command);
380e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        T.setCommandID(Info->getID());
3812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
3822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
3832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
384477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      case '&':
385477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        lexHTMLCharacterReference(T);
386477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        return;
387477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
3882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '<': {
3892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
391477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *TokenPtr;
395a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko        if (isHTMLIdentifierStartingCharacter(C))
3963f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLStartTag(T);
3972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        else if (C == '/')
3983f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLEndTag(T);
399477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        else
400477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
401477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\n':
4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\r':
4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipNewline(TokenPtr, CommentEnd);
4082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::newline);
4092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (CommentState == LCS_InsideCComment)
4112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          skipLineStartingDecorations();
4122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      default: {
415aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
416aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko                         find_first_of("\n\r\\@&<");
417aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        if (End != StringRef::npos)
418aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr += End;
419aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        else
420aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr = CommentEnd;
421477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T,
4292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                     const char *TextBegin,
430e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                     char Marker, const CommandInfo *Info) {
431e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimBlockCommand);
432e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
4332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.clear();
4342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
435e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  VerbatimBlockEndCommandName.append(Info->EndCommandName);
4362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
438e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimBlockID(Info->getID());
4392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4408d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // If there is a newline following the verbatim opening command, skip the
4418d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // newline so that we don't create an tok::verbatim_block_line with empty
4428d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // text content.
4438d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd) {
4448d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    const char C = *BufferPtr;
4458d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    if (C == '\n' || C == '\r') {
4468d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      BufferPtr = skipNewline(BufferPtr, CommentEnd);
4478d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      State = LS_VerbatimBlockBody;
4488d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      return;
4498d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    }
4508d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  }
4518d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockFirstLine;
4532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) {
45664da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain:
4572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr < CommentEnd);
4582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // FIXME: It would be better to scan the text once, finding either the block
4602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // end command or newline.
4612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  //
4622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Extract current line.
4632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
4642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef Line(BufferPtr, Newline - BufferPtr);
4652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Look for end command in current line.
4672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  size_t Pos = Line.find(VerbatimBlockEndCommandName);
4688d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  const char *TextEnd;
4692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *NextLine;
4702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (Pos == StringRef::npos) {
4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line is completely verbatim.
4728d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = Newline;
4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NextLine = skipNewline(Newline, CommentEnd);
4742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else if (Pos == 0) {
4752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line contains just an end command.
4762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
477f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, End, tok::verbatim_block_end);
479e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
4812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
4822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
4832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // There is some text, followed by end command.  Extract text first.
4848d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = BufferPtr + Pos;
4858d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    NextLine = TextEnd;
48664da4e55c111f4733135e1780216609569767351Dmitri Gribenko    // If there is only whitespace before end command, skip whitespace.
48764da4e55c111f4733135e1780216609569767351Dmitri Gribenko    if (isWhitespace(BufferPtr, TextEnd)) {
48864da4e55c111f4733135e1780216609569767351Dmitri Gribenko      BufferPtr = TextEnd;
48964da4e55c111f4733135e1780216609569767351Dmitri Gribenko      goto again;
49064da4e55c111f4733135e1780216609569767351Dmitri Gribenko    }
4912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4938d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  StringRef Text(BufferPtr, TextEnd - BufferPtr);
4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, NextLine, tok::verbatim_block_line);
495f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  T.setVerbatimBlockText(Text);
4962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockBody;
4982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) {
5012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_VerbatimBlockBody);
5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (CommentState == LCS_InsideCComment)
5042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    skipLineStartingDecorations();
5052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  lexVerbatimBlockFirstLine(T);
5072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
509e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
510e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                    const CommandInfo *Info) {
511e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimLineCommand);
512962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
513e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimLineID(Info->getID());
514962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
515962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_VerbatimLineText;
516962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko}
517962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
518962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) {
519962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  assert(State == LS_VerbatimLineText);
520962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
521962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  // Extract current line.
522962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
523962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const StringRef Text(BufferPtr, Newline - BufferPtr);
524962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, Newline, tok::verbatim_line_text);
5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  T.setVerbatimLineText(Text);
526962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
527962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_Normal;
5282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
530477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) {
531477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *TokenPtr = BufferPtr;
532477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  assert(*TokenPtr == '&');
533477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++;
534477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (TokenPtr == CommentEnd) {
535477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
536477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
537477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
538477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *NamePtr;
539477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isNamed = false;
540477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isDecimal = false;
541477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char C = *TokenPtr;
542477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (isHTMLNamedCharacterReferenceCharacter(C)) {
543477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    NamePtr = TokenPtr;
544477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
545477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    isNamed = true;
546477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else if (C == '#') {
547477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr++;
548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (TokenPtr == CommentEnd) {
549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    C = *TokenPtr;
553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (isHTMLDecimalCharacterReferenceCharacter(C)) {
554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      isDecimal = true;
557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else if (C == 'x' || C == 'X') {
558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr++;
559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else {
562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else {
566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      *TokenPtr != ';') {
571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
574477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Name(NamePtr, TokenPtr - NamePtr);
575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++; // Skip semicolon.
576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Resolved;
5775bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  if (isNamed)
578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLNamedCharacterReference(Name);
579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else if (isDecimal)
580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLDecimalCharacterReference(Name);
581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLHexCharacterReference(Name);
583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (Resolved.empty()) {
585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  formTokenWithChars(T, TokenPtr, tok::text);
589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  T.setText(Resolved);
590477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return;
591477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
592477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
5933f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) {
594a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  assert(BufferPtr[0] == '<' &&
595a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         isHTMLIdentifierStartingCharacter(BufferPtr[1]));
5962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
597f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
598834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
599834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
600834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
601834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
602834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
6033f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
6043f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  T.setHTMLTagStartName(Name);
6052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
608a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  const char C = *BufferPtr;
609a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (BufferPtr != CommentEnd &&
610a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
6113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLStartTag;
6122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6143f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) {
6153f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  assert(State == LS_HTMLStartTag);
6162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
6182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  char C = *TokenPtr;
6192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (isHTMLIdentifierCharacter(C)) {
6202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
621f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
6222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, TokenPtr, tok::html_ident);
623f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    T.setHTMLIdent(Ident);
6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
6252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch (C) {
6262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '=':
6272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_equals);
6292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\"':
6312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\'': {
6322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *OpenQuote = TokenPtr;
6332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *ClosingQuote = TokenPtr;
6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (TokenPtr != CommentEnd) // Skip closing quote.
6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
6382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      T.setHTMLQuotedString(StringRef(OpenQuote + 1,
6392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                      ClosingQuote - (OpenQuote + 1)));
6402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '>':
6432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_greater);
645a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      State = LS_Normal;
646a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      return;
647a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko    case '/':
648a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      TokenPtr++;
649a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      if (TokenPtr != CommentEnd && *TokenPtr == '>') {
650a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        TokenPtr++;
651a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
652477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      } else
653477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
654477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
655a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      State = LS_Normal;
656a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      return;
6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Now look ahead and return to normal state if we don't see any HTML tokens
6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // ahead.
6622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd) {
6642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  C = *BufferPtr;
669a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (!isHTMLIdentifierStartingCharacter(C) &&
6702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C != '=' && C != '\"' && C != '\'' && C != '>') {
6712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6763f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) {
6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
6782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
6802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
681834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
682834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
683834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
684834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
685834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
6862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *End = skipWhitespace(TagNameEnd, CommentEnd);
6882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6893f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, End, tok::html_end_tag);
690834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  T.setHTMLTagEndName(Name);
6918d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6928d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd && *BufferPtr == '>')
6933f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLEndTag;
6948d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko}
6958d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6963f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) {
6978d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
6988d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
6998d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
7008d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  State = LS_Normal;
7012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
703aa58081902ad31927df02e8537d972eabe29d6dfDmitri GribenkoLexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
704af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko             SourceLocation FileLoc,
7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko             const char *BufferStart, const char *BufferEnd):
706aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko    Allocator(Allocator), Traits(Traits),
7072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferStart(BufferStart), BufferEnd(BufferEnd),
708af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko    FileLoc(FileLoc), BufferPtr(BufferStart),
7092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState(LCS_BeforeComment), State(LS_Normal) {
7102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) {
7132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain:
7142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (CommentState) {
7152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BeforeComment:
7162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr == BufferEnd) {
7172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, BufferPtr, tok::eof);
7182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '/');
7222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++; // Skip first slash.
7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*BufferPtr) {
7242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '/': { // BCPL comment.
7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip second slash.
7262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd) {
7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Skip Doxygen magic marker, if it is present.
7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // It might be missing because of a typo //< or /*<, or because we
7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // merged this non-Doxygen comment into a bunch of Doxygen comments
7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // around it: /** ... */ /* ... */ /** ... */
7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *BufferPtr;
7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (C == '/' || C == '!')
7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          BufferPtr++;
7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip it even if the comment is not a Doxygen one, because //< and /*<
7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // are frequent typos.
7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideBCPLComment;
7448d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
7458d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko        State = LS_Normal;
7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '*': { // C comment.
7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip star.
7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip Doxygen magic marker.
7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char C = *BufferPtr;
7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideCComment;
7622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      State = LS_Normal;
7632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    default:
7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      llvm_unreachable("second character of comment should be '/' or '*'");
7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BetweenComments: {
7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Consecutive comments are extracted only if there is only whitespace
7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // between them.  So we can search for the start of the next comment.
7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EndWhitespace = BufferPtr;
7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EndWhitespace++;
7762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Turn any whitespace between comments (and there is only whitespace
778a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // between them -- guaranteed by comment extraction) into a newline.  We
779a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // have two newlines between C comments in total (first one was synthesized
780a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // after a comment).
7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, EndWhitespace, tok::newline);
7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState = LCS_BeforeComment;
7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideBCPLComment:
7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideCComment:
7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != CommentEnd) {
7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      lexCommentText(T);
7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else {
7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip C comment closing sequence.
7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CommentState == LCS_InsideCComment) {
7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
7962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr += 2;
7972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr <= BufferEnd);
7982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Synthenize newline just after the C comment, regardless if there is
8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // actually a newline.
8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, BufferPtr, tok::newline);
8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        break;
8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      } else {
8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't synthesized a newline after BCPL comment.
8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        goto again;
8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8142d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok,
8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             const SourceManager &SourceMgr,
8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             bool *Invalid) const {
8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation Loc = Tok.getLocation();
8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool InvalidTemp = false;
8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (InvalidTemp) {
8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    *Invalid = true;
8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return StringRef();
8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Begin = File.data() + LocInfo.second;
8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return StringRef(Begin, Tok.getLength());
8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments
8322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang
8332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
834