12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h"
2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h"
3efa78d163214fd9e909ab2bf6911edfbc7a2b9dfFariborz Jahanian#include "clang/AST/CommentDiagnostic.h"
4bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko#include "clang/Basic/CharInfo.h"
5c934dfe950a14fe447aa14a7dae25d00ee87c8bbDmitri Gribenko#include "llvm/ADT/StringExtras.h"
62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h"
7cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h"
82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h"
92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang {
112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments {
122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const {
142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << "comments::Token Kind=" << Kind << " ";
152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  Loc.dump(SM);
162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
190ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
20bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
230ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
24bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isDigit(C);
25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
270ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline bool isHTMLHexCharacterReferenceCharacter(char C) {
28bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isHexDigit(C);
29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
30834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
310ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkostatic inline StringRef convertCodePointToUTF8(
320ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko                                      llvm::BumpPtrAllocator &Allocator,
330ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko                                      unsigned CodePoint) {
34658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
35658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  char *ResolvedPtr = Resolved;
36cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko  if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
37658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef(Resolved, ResolvedPtr - Resolved);
38658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian  else
39658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian    return StringRef();
40658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
415bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
420ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenkonamespace {
430ff4f8bf47c924b4b01d989a53432a95471a068dDmitri Gribenko
445bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLTags.inc"
455bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
465bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko
475bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko} // unnamed namespace
48658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian
49477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
505bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  // Fast path, first check a few most widely used named character references.
51477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return llvm::StringSwitch<StringRef>(Name)
52477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("amp", "&")
53477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("lt", "<")
54477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("gt", ">")
55477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("quot", "\"")
56477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("apos", "\'")
575bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      // Slow path.
585bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko      .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
59658a115c8e0d5bddf607a13d2ce13cd306ef2389Fariborz Jahanian}
60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  unsigned CodePoint = 0;
63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint *= 10;
66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint += Name[i] - '0';
67477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
685bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
695bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko}
70477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
715bd1e5ba000023910ad986a16dd16d7ca914750aDmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
725bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  unsigned CodePoint = 0;
735bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
745bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint *= 16;
755bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    const char C = Name[i];
765bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    assert(isHTMLHexCharacterReferenceCharacter(C));
775bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko    CodePoint += llvm::hexDigitValue(C);
785bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  }
795bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  return convertCodePointToUTF8(Allocator, CodePoint);
80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
81477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() {
832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // This function should be called only for C comments
842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideCComment);
852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd)
872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (*BufferPtr) {
902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case ' ':
912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\t':
922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\f':
932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\v': {
942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *NewBufferPtr = BufferPtr;
952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NewBufferPtr++;
962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (NewBufferPtr == CommentEnd)
972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *NewBufferPtr;
100bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    while (isHorizontalWhitespace(C)) {
1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      NewBufferPtr++;
1022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (NewBufferPtr == CommentEnd)
1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *NewBufferPtr;
1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '*')
1072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr = NewBufferPtr + 1;
1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '*':
1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace {
1178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string.
1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) {
1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
120bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    if (isVerticalWhitespace(*BufferPtr))
1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == BufferEnd)
1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return BufferPtr;
1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (*BufferPtr == '\n')
1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  else {
1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '\r');
1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != BufferEnd && *BufferPtr == '\n')
1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++;
1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferPtr;
1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
141477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr,
142477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                        const char *BufferEnd) {
143477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
144477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
145477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
146477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
147477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
148477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
149477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
150477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr,
151477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
152477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
153477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
154477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
155477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
156477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
157477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
158477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
159477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr,
16031c71ca0eb83967085f21713b1dedff8c7f7a90fDmitri Gribenko                                      const char *BufferEnd) {
161477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
162477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
163477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
164477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
165477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
166477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
167477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
168a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) {
169bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
170a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko}
171a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko
1722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) {
173bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isAlphanumeric(C);
1742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isHTMLIdentifierCharacter(*BufferPtr))
1792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed.
1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko///
1872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote.
1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{
1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char Quote = *BufferPtr;
1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(Quote == '\"' || Quote == '\'');
1922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr++;
1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == Quote && BufferPtr[-1] != '\\')
1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isWhitespace(*BufferPtr))
2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
21064da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
21164da4e55c111f4733135e1780216609569767351Dmitri Gribenko  return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
21264da4e55c111f4733135e1780216609569767351Dmitri Gribenko}
21364da4e55c111f4733135e1780216609569767351Dmitri Gribenko
2148c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenkobool isCommandNameStartCharacter(char C) {
215bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isLetter(C);
2168c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko}
2178c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko
2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) {
219bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  return isAlphanumeric(C);
2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isCommandNameCharacter(*BufferPtr))
2252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments.
2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs.
2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *CurPtr = BufferPtr;
2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (CurPtr != BufferEnd) {
235bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    while (!isVerticalWhitespace(*CurPtr)) {
2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr++;
2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CurPtr == BufferEnd)
2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferEnd;
2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // We found a newline, check if it is escaped.
2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EscapePtr = CurPtr - 1;
2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(isHorizontalWhitespace(*EscapePtr))
2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EscapePtr--;
2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*EscapePtr == '\\' ||
2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // We found an escaped newline.
2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr = skipNewline(CurPtr, BufferEnd);
2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else
2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return CurPtr; // Not an escaped newline.
2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments.
2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs.
2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*BufferPtr == '*') {
2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      assert(BufferPtr + 1 != BufferEnd);
2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (*(BufferPtr + 1) == '/')
2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferPtr;
2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm_unreachable("buffer end hit before '*/' was seen");
2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2680089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian
2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace
2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
271651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesvoid Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
272651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                               tok::TokenKind Kind) {
273651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  const unsigned TokLen = TokEnd - BufferPtr;
274651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  Result.setLocation(getSourceLocation(BufferPtr));
275651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  Result.setKind(Kind);
276651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  Result.setLength(TokLen);
277651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#ifndef NDEBUG
278651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  Result.TextPtr = "<UNSET>";
279651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  Result.IntVal = 7;
280651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#endif
281651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  BufferPtr = TokEnd;
282651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
283651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) {
2852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideBCPLComment ||
2862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         CommentState == LCS_InsideCComment);
2872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (State) {
2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_Normal:
2902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
2912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockFirstLine:
2922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockFirstLine(T);
2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockBody:
2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockBody(T);
2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
297962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  case LS_VerbatimLineText:
298962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    lexVerbatimLineText(T);
299962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    return;
3003f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLStartTag:
3013f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLStartTag(T);
3022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
3033f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLEndTag:
3043f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLEndTag(T);
3058d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    return;
3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_Normal);
3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(TokenPtr < CommentEnd);
3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (TokenPtr != CommentEnd) {
3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*TokenPtr) {
3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\\':
3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '@': {
316808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // Commands that start with a backslash and commands that start with
317808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // 'at' have equivalent semantics.  But we keep information about the
318808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        // exact syntax in AST for comments.
319808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        tok::TokenKind CommandKind =
320808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko            (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
323477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        char C = *TokenPtr;
3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        switch (C) {
3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        default:
3292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          break;
3302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\\': case '@': case '&': case '$':
3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '#':  case '<': case '>': case '%':
3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\"': case '.': case ':':
3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          // This is one of \\ \@ \& \$ etc escape sequences.
3352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          TokenPtr++;
3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            // This is the \:: escape sequence.
3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
340f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::text);
342f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          T.setText(UnescapedText);
3432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't make zero-length commands.
3478c05da3fd8db98af482826ba059ab1ad6d58010fDmitri Gribenko        if (!isCommandNameStartCharacter(*TokenPtr)) {
348477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipCommandName(TokenPtr, CommentEnd);
3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        unsigned Length = TokenPtr - (BufferPtr + 1);
3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Hardcoded support for lexing LaTeX formula commands
3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // \f$ \f[ \f] \f{ \f} as a single command.
3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          C = *TokenPtr;
3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
3602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            Length++;
3622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
3632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
365176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines        StringRef CommandName(BufferPtr + 1, Length);
3662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
367e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
368e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (!Info) {
369abbfa671539c74b5bec66a64964de984c908cdfaFariborz Jahanian          if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
3700089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            StringRef CorrectedName = Info->Name;
371edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling            SourceLocation Loc = getSourceLocation(BufferPtr);
372edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling            SourceRange CommandRange(Loc.getLocWithOffset(1),
373edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling                                     getSourceLocation(TokenPtr));
374edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling            Diag(Loc, diag::warn_correct_comment_command_name)
3750089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian              << CommandName << CorrectedName
3760089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian              << FixItHint::CreateReplacement(CommandRange, CorrectedName);
3770089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian          } else {
378edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling            formTokenWithChars(T, TokenPtr, tok::unknown_command);
379edb95d3f9eb354b30baddd58fee5d2e7b3da2c92Bill Wendling            T.setUnknownCommandName(CommandName);
3800089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            Diag(T.getLocation(), diag::warn_unknown_comment_command_name);
3810089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian            return;
3820089bc4ddee6bb309ad25f4c7ad4b7ffe5df4512Fariborz Jahanian          }
3832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
384e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimBlockCommand) {
385e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
386e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          return;
387e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        }
388e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        if (Info->IsVerbatimLineCommand) {
389e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko          setupAndLexVerbatimLine(T, TokenPtr, Info);
3902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
392808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko        formTokenWithChars(T, TokenPtr, CommandKind);
393e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko        T.setCommandID(Info->getID());
3942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
3952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
3962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
397477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      case '&':
398477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        lexHTMLCharacterReference(T);
399477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        return;
400477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '<': {
4022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
4032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
404477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *TokenPtr;
408a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko        if (isHTMLIdentifierStartingCharacter(C))
4093f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLStartTag(T);
4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        else if (C == '/')
4113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLEndTag(T);
412477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        else
413477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
414477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\n':
4192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\r':
4202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipNewline(TokenPtr, CommentEnd);
4212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::newline);
4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (CommentState == LCS_InsideCComment)
4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          skipLineStartingDecorations();
4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      default: {
428aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
429aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko                         find_first_of("\n\r\\@&<");
430aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        if (End != StringRef::npos)
431aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr += End;
432aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko        else
433aa7dbafc3539868ce271cb336444ec544260905aDmitri Gribenko          TokenPtr = CommentEnd;
434477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
4352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
4382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T,
4422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                     const char *TextBegin,
443e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                     char Marker, const CommandInfo *Info) {
444e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimBlockCommand);
445e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
4462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.clear();
4472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
448e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  VerbatimBlockEndCommandName.append(Info->EndCommandName);
4492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
451e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimBlockID(Info->getID());
4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4538d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // If there is a newline following the verbatim opening command, skip the
4548d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // newline so that we don't create an tok::verbatim_block_line with empty
4558d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // text content.
456bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko  if (BufferPtr != CommentEnd &&
457bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko      isVerticalWhitespace(*BufferPtr)) {
458bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    BufferPtr = skipNewline(BufferPtr, CommentEnd);
459bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    State = LS_VerbatimBlockBody;
460bf8814478fddfa611911bdbd6a53a6614938cc63Dmitri Gribenko    return;
4618d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  }
4628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
4632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockFirstLine;
4642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) {
46764da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain:
4682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr < CommentEnd);
4692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // FIXME: It would be better to scan the text once, finding either the block
4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // end command or newline.
4722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  //
4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Extract current line.
4742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
4752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef Line(BufferPtr, Newline - BufferPtr);
4762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Look for end command in current line.
4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  size_t Pos = Line.find(VerbatimBlockEndCommandName);
4798d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  const char *TextEnd;
4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *NextLine;
4812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (Pos == StringRef::npos) {
4822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line is completely verbatim.
4838d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = Newline;
4842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NextLine = skipNewline(Newline, CommentEnd);
4852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else if (Pos == 0) {
4862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line contains just an end command.
4872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
488f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
4892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, End, tok::verbatim_block_end);
490e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
4912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
4922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
4932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // There is some text, followed by end command.  Extract text first.
4958d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = BufferPtr + Pos;
4968d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    NextLine = TextEnd;
49764da4e55c111f4733135e1780216609569767351Dmitri Gribenko    // If there is only whitespace before end command, skip whitespace.
49864da4e55c111f4733135e1780216609569767351Dmitri Gribenko    if (isWhitespace(BufferPtr, TextEnd)) {
49964da4e55c111f4733135e1780216609569767351Dmitri Gribenko      BufferPtr = TextEnd;
50064da4e55c111f4733135e1780216609569767351Dmitri Gribenko      goto again;
50164da4e55c111f4733135e1780216609569767351Dmitri Gribenko    }
5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
5032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5048d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  StringRef Text(BufferPtr, TextEnd - BufferPtr);
5052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, NextLine, tok::verbatim_block_line);
506f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  T.setVerbatimBlockText(Text);
5072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockBody;
5092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) {
5122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_VerbatimBlockBody);
5132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (CommentState == LCS_InsideCComment)
5152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    skipLineStartingDecorations();
5162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
51733337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar  if (BufferPtr == CommentEnd) {
51833337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar    formTokenWithChars(T, BufferPtr, tok::verbatim_block_line);
51933337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar    T.setVerbatimBlockText("");
52033337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar    return;
52133337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar  }
52233337ca4d89605025818daf83390ab4271d598d9Pirama Arumuga Nainar
5232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  lexVerbatimBlockFirstLine(T);
5242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
526e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
527e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                    const CommandInfo *Info) {
528e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  assert(Info->IsVerbatimLineCommand);
529962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
530e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  T.setVerbatimLineID(Info->getID());
531962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
532962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_VerbatimLineText;
533962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko}
534962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
535962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) {
536962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  assert(State == LS_VerbatimLineText);
537962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
538962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  // Extract current line.
539962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
540176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines  StringRef Text(BufferPtr, Newline - BufferPtr);
541962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, Newline, tok::verbatim_line_text);
5422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  T.setVerbatimLineText(Text);
543962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
544962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_Normal;
5452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
547477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) {
548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *TokenPtr = BufferPtr;
549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  assert(*TokenPtr == '&');
550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++;
551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (TokenPtr == CommentEnd) {
552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *NamePtr;
556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isNamed = false;
557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isDecimal = false;
558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char C = *TokenPtr;
559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (isHTMLNamedCharacterReferenceCharacter(C)) {
560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    NamePtr = TokenPtr;
561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    isNamed = true;
563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else if (C == '#') {
564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr++;
565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (TokenPtr == CommentEnd) {
566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    C = *TokenPtr;
570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (isHTMLDecimalCharacterReferenceCharacter(C)) {
571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      isDecimal = true;
574477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else if (C == 'x' || C == 'X') {
575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr++;
576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
577477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else {
579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else {
583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      *TokenPtr != ';') {
588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
590477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
591477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Name(NamePtr, TokenPtr - NamePtr);
592477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++; // Skip semicolon.
593477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Resolved;
5945bd1e5ba000023910ad986a16dd16d7ca914750aDmitri Gribenko  if (isNamed)
595477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLNamedCharacterReference(Name);
596477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else if (isDecimal)
597477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLDecimalCharacterReference(Name);
598477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
599477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLHexCharacterReference(Name);
600477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
601477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (Resolved.empty()) {
602477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
603477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
604477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
605477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  formTokenWithChars(T, TokenPtr, tok::text);
606477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  T.setText(Resolved);
607477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return;
608477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
609477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
6103f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) {
611a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  assert(BufferPtr[0] == '<' &&
612a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         isHTMLIdentifierStartingCharacter(BufferPtr[1]));
6132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
614f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
615834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
616834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
617834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
618834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
619834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
6203f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
6213f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  T.setHTMLTagStartName(Name);
6222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
625a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  const char C = *BufferPtr;
626a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (BufferPtr != CommentEnd &&
627a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
6283f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLStartTag;
6292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6313f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) {
6323f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  assert(State == LS_HTMLStartTag);
6332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  char C = *TokenPtr;
6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (isHTMLIdentifierCharacter(C)) {
6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
638f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
6392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, TokenPtr, tok::html_ident);
640f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    T.setHTMLIdent(Ident);
6412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
6422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch (C) {
6432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '=':
6442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_equals);
6462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\"':
6482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\'': {
6492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *OpenQuote = TokenPtr;
6502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
6512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *ClosingQuote = TokenPtr;
6522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (TokenPtr != CommentEnd) // Skip closing quote.
6532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
6542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
6552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      T.setHTMLQuotedString(StringRef(OpenQuote + 1,
6562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                      ClosingQuote - (OpenQuote + 1)));
6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '>':
6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_greater);
662a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      State = LS_Normal;
663a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      return;
664a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko    case '/':
665a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      TokenPtr++;
666a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      if (TokenPtr != CommentEnd && *TokenPtr == '>') {
667a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        TokenPtr++;
668a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
669477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      } else
670477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
671477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
672a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      State = LS_Normal;
673a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      return;
6742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Now look ahead and return to normal state if we don't see any HTML tokens
6782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // ahead.
6792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd) {
6812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  C = *BufferPtr;
686a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (!isHTMLIdentifierStartingCharacter(C) &&
6872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C != '=' && C != '\"' && C != '\'' && C != '>') {
6882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6933f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) {
6942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
6952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
6972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
698834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
699834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
700834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
701834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
702834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
7032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *End = skipWhitespace(TagNameEnd, CommentEnd);
7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7063f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, End, tok::html_end_tag);
707834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  T.setHTMLTagEndName(Name);
7088d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7098d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd && *BufferPtr == '>')
7103f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLEndTag;
7118d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko}
7128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) {
7148d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
7158d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7168d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
7178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  State = LS_Normal;
7182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
720ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz JahanianLexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
721ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian             const CommandTraits &Traits,
722af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko             SourceLocation FileLoc,
7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko             const char *BufferStart, const char *BufferEnd):
724ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian    Allocator(Allocator), Diags(Diags), Traits(Traits),
7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferStart(BufferStart), BufferEnd(BufferEnd),
726af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko    FileLoc(FileLoc), BufferPtr(BufferStart),
7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState(LCS_BeforeComment), State(LS_Normal) {
7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) {
7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain:
7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (CommentState) {
7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BeforeComment:
7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr == BufferEnd) {
7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, BufferPtr, tok::eof);
7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '/');
7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++; // Skip first slash.
7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*BufferPtr) {
7422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '/': { // BCPL comment.
7432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip second slash.
7442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd) {
7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Skip Doxygen magic marker, if it is present.
7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // It might be missing because of a typo //< or /*<, or because we
7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // merged this non-Doxygen comment into a bunch of Doxygen comments
7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // around it: /** ... */ /* ... */ /** ... */
7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *BufferPtr;
7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (C == '/' || C == '!')
7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          BufferPtr++;
7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip it even if the comment is not a Doxygen one, because //< and /*<
7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // are frequent typos.
7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideBCPLComment;
7628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
7638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko        State = LS_Normal;
7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '*': { // C comment.
7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip star.
7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip Doxygen magic marker.
7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char C = *BufferPtr;
7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideCComment;
7802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      State = LS_Normal;
7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    default:
7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      llvm_unreachable("second character of comment should be '/' or '*'");
7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BetweenComments: {
7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Consecutive comments are extracted only if there is only whitespace
7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // between them.  So we can search for the start of the next comment.
7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EndWhitespace = BufferPtr;
7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EndWhitespace++;
7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Turn any whitespace between comments (and there is only whitespace
796a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // between them -- guaranteed by comment extraction) into a newline.  We
797a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // have two newlines between C comments in total (first one was synthesized
798a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // after a comment).
7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, EndWhitespace, tok::newline);
8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState = LCS_BeforeComment;
8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideBCPLComment:
8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideCComment:
8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != CommentEnd) {
8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      lexCommentText(T);
8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else {
8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip C comment closing sequence.
8122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CommentState == LCS_InsideCComment) {
8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
8142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr += 2;
8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr <= BufferEnd);
8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Synthenize newline just after the C comment, regardless if there is
8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // actually a newline.
8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, BufferPtr, tok::newline);
8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        break;
8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      } else {
8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't synthesized a newline after BCPL comment.
8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        goto again;
8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8322d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok,
8332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             const SourceManager &SourceMgr,
8342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             bool *Invalid) const {
8352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation Loc = Tok.getLocation();
8362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
8372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool InvalidTemp = false;
8392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
8402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (InvalidTemp) {
8412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    *Invalid = true;
8422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return StringRef();
8432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Begin = File.data() + LocInfo.second;
8462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return StringRef(Begin, Tok.getLength());
8472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments
8502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang
8512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
852