CommentLexer.cpp revision 834a5bd311b4a32f89937ca5b6dd2b4111891859
12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "clang/AST/CommentLexer.h"
2aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko#include "clang/AST/CommentCommandTraits.h"
3477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko#include "clang/Basic/ConvertUTF.h"
42d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/StringSwitch.h"
52d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/ErrorHandling.h"
62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
72d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang {
82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments {
92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Token::dump(const Lexer &L, const SourceManager &SM) const {
112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << "comments::Token Kind=" << Kind << " ";
122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  Loc.dump(SM);
132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
16477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkonamespace {
17477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLNamedCharacterReferenceCharacter(char C) {
18477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return (C >= 'a' && C <= 'z') ||
19477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'A' && C <= 'Z');
20477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
21477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
22477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLDecimalCharacterReferenceCharacter(char C) {
23477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return C >= '0' && C <= '9';
24477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
25477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
26477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkobool isHTMLHexCharacterReferenceCharacter(char C) {
27477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return (C >= '0' && C <= '9') ||
28477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'a' && C <= 'f') ||
29477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko         (C >= 'A' && C <= 'F');
30477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
31834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
32834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenkobool isHTMLTagName(StringRef Name) {
33834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  return llvm::StringSwitch<bool>(Name)
34834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("em", "strong", true)
35834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("tt", "i", "b", "big", "small", true)
36834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("strike", "s", "u", "font", true)
37834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("a", true)
38834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("hr", true)
39834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("div", "span", true)
40834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("h1", "h2", "h3", true)
41834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("h4", "h5", "h6", true)
42834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("code", true)
43834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("blockquote", true)
44834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("sub", "sup", true)
45834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("img", true)
46834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("p", true)
47834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("br", true)
48834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Case("pre", true)
49834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("ins", "del", true)
50834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("ul", "ol", "li", true)
51834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("dl", "dt", "dd", true)
52834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("table", "caption", true)
53834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("thead", "tfoot", "tbody", true)
54834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("colgroup", "col", true)
55834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Cases("tr", "th", "td", true)
56834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko      .Default(false);
57834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko}
58477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko} // unnamed namespace
59477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
60477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
61477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return llvm::StringSwitch<StringRef>(Name)
62477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("amp", "&")
63477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("lt", "<")
64477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("gt", ">")
65477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("quot", "\"")
66477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Case("apos", "\'")
67477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      .Default("");
68477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
69477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
70477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
71477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  unsigned CodePoint = 0;
72477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
73477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
74477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint *= 10;
75477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint += Name[i] - '0';
76477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
77477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
78477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
79477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char *ResolvedPtr = Resolved;
80477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
81477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return StringRef(Resolved, ResolvedPtr - Resolved);
82477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
83477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return StringRef();
84477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
85477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
86477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri GribenkoStringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
87477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  unsigned CodePoint = 0;
88477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
89477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    CodePoint *= 16;
90477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    const char C = Name[i];
91477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    assert(isHTMLHexCharacterReferenceCharacter(C));
92477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (C >= '0' && C <= '9')
93477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      CodePoint += Name[i] - '0';
94477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    else if (C >= 'a' && C <= 'f')
95477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      CodePoint += Name[i] - 'a' + 10;
96477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    else
97477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      CodePoint += Name[i] - 'A' + 10;
98477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
99477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
100477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
101477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char *ResolvedPtr = Resolved;
102477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
103477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return StringRef(Resolved, ResolvedPtr - Resolved);
104477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
105477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return StringRef();
106477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
107477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
1082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::skipLineStartingDecorations() {
1092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // This function should be called only for C comments
1102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideCComment);
1112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd)
1132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
1142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (*BufferPtr) {
1162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case ' ':
1172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\t':
1182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\f':
1192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '\v': {
1202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *NewBufferPtr = BufferPtr;
1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NewBufferPtr++;
1222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (NewBufferPtr == CommentEnd)
1232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
1242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *NewBufferPtr;
1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
1272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      NewBufferPtr++;
1282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (NewBufferPtr == CommentEnd)
1292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *NewBufferPtr;
1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
1322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '*')
1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr = NewBufferPtr + 1;
1342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case '*':
1372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
1392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace {
1438d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko/// Returns pointer to the first newline character in the string.
1442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findNewline(const char *BufferPtr, const char *BufferEnd) {
1452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
1462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
1472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == '\n' || C == '\r')
1482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
1492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
1512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
1542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == BufferEnd)
1552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return BufferPtr;
1562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (*BufferPtr == '\n')
1582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  else {
1602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '\r');
1612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++;
1622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != BufferEnd && *BufferPtr == '\n')
1632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++;
1642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferPtr;
1662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
1672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
168477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipNamedCharacterReference(const char *BufferPtr,
169477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                        const char *BufferEnd) {
170477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
171477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
172477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
173477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
174477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
175477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
176477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
177477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipDecimalCharacterReference(const char *BufferPtr,
178477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
179477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
180477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
181477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
182477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
183477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
184477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
185477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
186477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkoconst char *skipHexCharacterReference(const char *BufferPtr,
187477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko                                          const char *BufferEnd) {
188477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
189477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
190477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return BufferPtr;
191477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
192477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return BufferEnd;
193477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
194477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
195a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenkobool isHTMLIdentifierStartingCharacter(char C) {
196a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  return (C >= 'a' && C <= 'z') ||
197a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         (C >= 'A' && C <= 'Z');
198a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko}
199a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko
2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHTMLIdentifierCharacter(char C) {
2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return (C >= 'a' && C <= 'z') ||
2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= 'A' && C <= 'Z') ||
2032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= '0' && C <= '9');
2042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
2072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isHTMLIdentifierCharacter(*BufferPtr))
2092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
2152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// string allowed.
2162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko///
2172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Returns pointer to closing quote.
2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
2192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko{
2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char Quote = *BufferPtr;
2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(Quote == '\"' || Quote == '\'');
2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr++;
2242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char C = *BufferPtr;
2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (C == Quote && BufferPtr[-1] != '\\')
2272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isHorizontalWhitespace(char C) {
2332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return C == ' ' || C == '\t' || C == '\f' || C == '\v';
2342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isWhitespace(char C) {
2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return C == ' ' || C == '\n' || C == '\r' ||
2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         C == '\t' || C == '\f' || C == '\v';
2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isWhitespace(*BufferPtr))
2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
24964da4e55c111f4733135e1780216609569767351Dmitri Gribenkobool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
25064da4e55c111f4733135e1780216609569767351Dmitri Gribenko  return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
25164da4e55c111f4733135e1780216609569767351Dmitri Gribenko}
25264da4e55c111f4733135e1780216609569767351Dmitri Gribenko
2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkobool isCommandNameCharacter(char C) {
2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return (C >= 'a' && C <= 'z') ||
2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= 'A' && C <= 'Z') ||
2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         (C >= '0' && C <= '9');
2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (!isCommandNameCharacter(*BufferPtr))
2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return BufferPtr;
2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for BCPL comments.
2682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Handles newlines escaped with backslash or trigraph for backslahs.
2692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *CurPtr = BufferPtr;
2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (CurPtr != BufferEnd) {
2722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    char C = *CurPtr;
2732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while (C != '\n' && C != '\r') {
2742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr++;
2752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CurPtr == BufferEnd)
2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferEnd;
2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C = *CurPtr;
2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // We found a newline, check if it is escaped.
2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EscapePtr = CurPtr - 1;
2812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(isHorizontalWhitespace(*EscapePtr))
2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EscapePtr--;
2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*EscapePtr == '\\' ||
2852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
2862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
2872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // We found an escaped newline.
2882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CurPtr = skipNewline(CurPtr, BufferEnd);
2892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else
2902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return CurPtr; // Not an escaped newline.
2912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return BufferEnd;
2932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
2942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Return the one past end pointer for C comments.
2962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// Very dumb, does not handle escaped newlines or trigraphs.
2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoconst char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
2982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
2992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (*BufferPtr == '*') {
3002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      assert(BufferPtr + 1 != BufferEnd);
3012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (*(BufferPtr + 1) == '/')
3022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return BufferPtr;
3032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
3042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
3052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  llvm_unreachable("buffer end hit before '*/' was seen");
3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // unnamed namespace
3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexCommentText(Token &T) {
3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(CommentState == LCS_InsideBCPLComment ||
3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko         CommentState == LCS_InsideCComment);
3122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (State) {
3142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_Normal:
3152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockFirstLine:
3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockFirstLine(T);
3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LS_VerbatimBlockBody:
3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    lexVerbatimBlockBody(T);
3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
322962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  case LS_VerbatimLineText:
323962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    lexVerbatimLineText(T);
324962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    return;
3253f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLStartTag:
3263f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLStartTag(T);
3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
3283f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  case LS_HTMLEndTag:
3293f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    lexHTMLEndTag(T);
3308d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    return;
3312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
3322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_Normal);
3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
3362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(TokenPtr < CommentEnd);
3372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  while (TokenPtr != CommentEnd) {
3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*TokenPtr) {
3392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\\':
3402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '@': {
3412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
3422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
343477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        char C = *TokenPtr;
3472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        switch (C) {
3482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        default:
3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          break;
3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\\': case '@': case '&': case '$':
3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '#':  case '<': case '>': case '%':
3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        case '\"': case '.': case ':':
3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          // This is one of \\ \@ \& \$ etc escape sequences.
3552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          TokenPtr++;
3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            // This is the \:: escape sequence.
3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
360f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          formTokenWithChars(T, TokenPtr, tok::text);
362f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko          T.setText(UnescapedText);
3632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't make zero-length commands.
3672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (!isCommandNameCharacter(*TokenPtr)) {
368477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
3692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipCommandName(TokenPtr, CommentEnd);
3732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        unsigned Length = TokenPtr - (BufferPtr + 1);
3742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Hardcoded support for lexing LaTeX formula commands
3762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // \f$ \f[ \f] \f{ \f} as a single command.
3772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
3782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          C = *TokenPtr;
3792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
3802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            TokenPtr++;
3812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            Length++;
3822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          }
3832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const StringRef CommandName(BufferPtr + 1, Length);
3862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        StringRef EndName;
3872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
388aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko        if (Traits.isVerbatimBlockCommand(CommandName, EndName)) {
3892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
3902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
392aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko        if (Traits.isVerbatimLineCommand(CommandName)) {
393962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko          setupAndLexVerbatimLine(T, TokenPtr);
3942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
3952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
3962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::command);
3972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        T.setCommandName(CommandName);
3982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
3992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
401477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      case '&':
402477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        lexHTMLCharacterReference(T);
403477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        return;
404477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '<': {
4062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
4072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (TokenPtr == CommentEnd) {
408477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
4092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          return;
4102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
4112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *TokenPtr;
412a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko        if (isHTMLIdentifierStartingCharacter(C))
4133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLStartTag(T);
4142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        else if (C == '/')
4153f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko          setupAndLexHTMLEndTag(T);
416477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        else
417477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko          formTextToken(T, TokenPtr);
418477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
4192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\n':
4232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      case '\r':
4242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr = skipNewline(TokenPtr, CommentEnd);
4252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::newline);
4262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (CommentState == LCS_InsideCComment)
4282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          skipLineStartingDecorations();
4292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      default: {
4322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        while (true) {
4332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          TokenPtr++;
4342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if (TokenPtr == CommentEnd)
4352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            break;
436a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko          const char C = *TokenPtr;
4372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          if(C == '\n' || C == '\r' ||
438477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko             C == '\\' || C == '@' || C == '&' || C == '<')
4392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko            break;
4402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        }
441477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
4422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        return;
4432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
4442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
4452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
4462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::setupAndLexVerbatimBlock(Token &T,
4492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                     const char *TextBegin,
4502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                     char Marker, StringRef EndName) {
4512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.clear();
4522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
4532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  VerbatimBlockEndCommandName.append(EndName);
4542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
455f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1));
4562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
457f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  T.setVerbatimBlockName(Name);
4582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4598d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // If there is a newline following the verbatim opening command, skip the
4608d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // newline so that we don't create an tok::verbatim_block_line with empty
4618d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  // text content.
4628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd) {
4638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    const char C = *BufferPtr;
4648d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    if (C == '\n' || C == '\r') {
4658d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      BufferPtr = skipNewline(BufferPtr, CommentEnd);
4668d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      State = LS_VerbatimBlockBody;
4678d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      return;
4688d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    }
4698d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  }
4708d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
4712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockFirstLine;
4722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
4732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockFirstLine(Token &T) {
47564da4e55c111f4733135e1780216609569767351Dmitri Gribenkoagain:
4762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr < CommentEnd);
4772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // FIXME: It would be better to scan the text once, finding either the block
4792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // end command or newline.
4802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  //
4812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Extract current line.
4822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
4832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef Line(BufferPtr, Newline - BufferPtr);
4842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
4852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Look for end command in current line.
4862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  size_t Pos = Line.find(VerbatimBlockEndCommandName);
4878d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  const char *TextEnd;
4882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *NextLine;
4892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (Pos == StringRef::npos) {
4902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line is completely verbatim.
4918d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = Newline;
4922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    NextLine = skipNewline(Newline, CommentEnd);
4932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else if (Pos == 0) {
4942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Current line contains just an end command.
4952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
496f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
4972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, End, tok::verbatim_block_end);
498f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    T.setVerbatimBlockName(Name);
4992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
5002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
5012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
5022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // There is some text, followed by end command.  Extract text first.
5038d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    TextEnd = BufferPtr + Pos;
5048d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    NextLine = TextEnd;
50564da4e55c111f4733135e1780216609569767351Dmitri Gribenko    // If there is only whitespace before end command, skip whitespace.
50664da4e55c111f4733135e1780216609569767351Dmitri Gribenko    if (isWhitespace(BufferPtr, TextEnd)) {
50764da4e55c111f4733135e1780216609569767351Dmitri Gribenko      BufferPtr = TextEnd;
50864da4e55c111f4733135e1780216609569767351Dmitri Gribenko      goto again;
50964da4e55c111f4733135e1780216609569767351Dmitri Gribenko    }
5102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
5112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  StringRef Text(BufferPtr, TextEnd - BufferPtr);
5132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  formTokenWithChars(T, NextLine, tok::verbatim_block_line);
514f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  T.setVerbatimBlockText(Text);
5152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  State = LS_VerbatimBlockBody;
5172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lexVerbatimBlockBody(Token &T) {
5202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(State == LS_VerbatimBlockBody);
5212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (CommentState == LCS_InsideCComment)
5232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    skipLineStartingDecorations();
5242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
5252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  lexVerbatimBlockFirstLine(T);
5262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
528962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) {
5292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
530962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
5312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  T.setVerbatimLineName(Name);
532962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
533962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_VerbatimLineText;
534962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko}
535962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
536962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenkovoid Lexer::lexVerbatimLineText(Token &T) {
537962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  assert(State == LS_VerbatimLineText);
538962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
539962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  // Extract current line.
540962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const char *Newline = findNewline(BufferPtr, CommentEnd);
541962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  const StringRef Text(BufferPtr, Newline - BufferPtr);
542962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  formTokenWithChars(T, Newline, tok::verbatim_line_text);
5432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  T.setVerbatimLineText(Text);
544962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
545962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  State = LS_Normal;
5462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
5472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
548477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenkovoid Lexer::lexHTMLCharacterReference(Token &T) {
549477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *TokenPtr = BufferPtr;
550477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  assert(*TokenPtr == '&');
551477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++;
552477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (TokenPtr == CommentEnd) {
553477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
554477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
555477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
556477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  const char *NamePtr;
557477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isNamed = false;
558477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  bool isDecimal = false;
559477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  char C = *TokenPtr;
560477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (isHTMLNamedCharacterReferenceCharacter(C)) {
561477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    NamePtr = TokenPtr;
562477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
563477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    isNamed = true;
564477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else if (C == '#') {
565477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    TokenPtr++;
566477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (TokenPtr == CommentEnd) {
567477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
568477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
569477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
570477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    C = *TokenPtr;
571477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    if (isHTMLDecimalCharacterReferenceCharacter(C)) {
572477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
573477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
574477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      isDecimal = true;
575477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else if (C == 'x' || C == 'X') {
576477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr++;
577477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      NamePtr = TokenPtr;
578477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
579477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    } else {
580477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      formTextToken(T, TokenPtr);
581477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      return;
582477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    }
583477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  } else {
584477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
585477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
586477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
587477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
588477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      *TokenPtr != ';') {
589477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
590477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
591477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
592477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Name(NamePtr, TokenPtr - NamePtr);
593477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  TokenPtr++; // Skip semicolon.
594477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef Resolved;
595477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (isNamed)
596477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLNamedCharacterReference(Name);
597477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else if (isDecimal)
598477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLDecimalCharacterReference(Name);
599477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  else
600477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Resolved = resolveHTMLHexCharacterReference(Name);
601477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
602477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  if (Resolved.empty()) {
603477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTextToken(T, TokenPtr);
604477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    return;
605477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
606477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  formTokenWithChars(T, TokenPtr, tok::text);
607477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  T.setText(Resolved);
608477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  return;
609477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko}
610477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
6113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLStartTag(Token &T) {
612a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  assert(BufferPtr[0] == '<' &&
613a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko         isHTMLIdentifierStartingCharacter(BufferPtr[1]));
6142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
615f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko  StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
616834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
617834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
618834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
619834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
620834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko
6213f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
6223f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  T.setHTMLTagStartName(Name);
6232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
626a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  const char C = *BufferPtr;
627a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (BufferPtr != CommentEnd &&
628a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
6293f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLStartTag;
6302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6323f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLStartTag(Token &T) {
6333f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  assert(State == LS_HTMLStartTag);
6342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TokenPtr = BufferPtr;
6362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  char C = *TokenPtr;
6372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (isHTMLIdentifierCharacter(C)) {
6382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
639f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
6402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, TokenPtr, tok::html_ident);
641f5e0aeac8a510ba1fd4c83391978cffd31e5ac69Dmitri Gribenko    T.setHTMLIdent(Ident);
6422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  } else {
6432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch (C) {
6442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '=':
6452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_equals);
6472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\"':
6492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '\'': {
6502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *OpenQuote = TokenPtr;
6512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
6522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char *ClosingQuote = TokenPtr;
6532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (TokenPtr != CommentEnd) // Skip closing quote.
6542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        TokenPtr++;
6552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
6562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      T.setHTMLQuotedString(StringRef(OpenQuote + 1,
6572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                      ClosingQuote - (OpenQuote + 1)));
6582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
6592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '>':
6612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      TokenPtr++;
6622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, TokenPtr, tok::html_greater);
663a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      State = LS_Normal;
664a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko      return;
665a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko    case '/':
666a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      TokenPtr++;
667a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      if (TokenPtr != CommentEnd && *TokenPtr == '>') {
668a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        TokenPtr++;
669a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko        formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
670477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko      } else
671477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko        formTextToken(T, TokenPtr);
672477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
673a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      State = LS_Normal;
674a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko      return;
6752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
6762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // Now look ahead and return to normal state if we don't see any HTML tokens
6792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  // ahead.
6802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
6812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (BufferPtr == CommentEnd) {
6822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  C = *BufferPtr;
687a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko  if (!isHTMLIdentifierStartingCharacter(C) &&
6882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      C != '=' && C != '\"' && C != '\'' && C != '>') {
6892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    State = LS_Normal;
6902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return;
6912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
6922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
6932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6943f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::setupAndLexHTMLEndTag(Token &T) {
6952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
6962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
6972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
6982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
699834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
700834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  if (!isHTMLTagName(Name)) {
701834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    formTextToken(T, TagNameEnd);
702834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko    return;
703834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  }
7042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *End = skipWhitespace(TagNameEnd, CommentEnd);
7062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7073f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  formTokenWithChars(T, End, tok::html_end_tag);
708834a5bd311b4a32f89937ca5b6dd2b4111891859Dmitri Gribenko  T.setHTMLTagEndName(Name);
7098d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7108d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  if (BufferPtr != CommentEnd && *BufferPtr == '>')
7113f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    State = LS_HTMLEndTag;
7128d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko}
7138d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7143f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenkovoid Lexer::lexHTMLEndTag(Token &T) {
7158d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
7168d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
7178d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
7188d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  State = LS_Normal;
7192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
721aa58081902ad31927df02e8537d972eabe29d6dfDmitri GribenkoLexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
722477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko             SourceLocation FileLoc, const CommentOptions &CommOpts,
7232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko             const char *BufferStart, const char *BufferEnd):
724aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko    Allocator(Allocator), Traits(Traits),
7252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferStart(BufferStart), BufferEnd(BufferEnd),
7262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
7272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState(LCS_BeforeComment), State(LS_Normal) {
7282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
7292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkovoid Lexer::lex(Token &T) {
7312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoagain:
7322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  switch (CommentState) {
7332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BeforeComment:
7342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr == BufferEnd) {
7352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      formTokenWithChars(T, BufferPtr, tok::eof);
7362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      return;
7372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(*BufferPtr == '/');
7402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    BufferPtr++; // Skip first slash.
7412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    switch(*BufferPtr) {
7422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '/': { // BCPL comment.
7432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip second slash.
7442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd) {
7462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Skip Doxygen magic marker, if it is present.
7472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // It might be missing because of a typo //< or /*<, or because we
7482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // merged this non-Doxygen comment into a bunch of Doxygen comments
7492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // around it: /** ... */ /* ... */ /** ... */
7502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char C = *BufferPtr;
7512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        if (C == '/' || C == '!')
7522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko          BufferPtr++;
7532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
7542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip it even if the comment is not a Doxygen one, because //< and /*<
7572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // are frequent typos.
7582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideBCPLComment;
7628d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
7638d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko        State = LS_Normal;
7642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
7652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    case '*': { // C comment.
7682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      BufferPtr++; // Skip star.
7692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip Doxygen magic marker.
7712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      const char C = *BufferPtr;
7722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
7732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7742d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7752d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip less-than symbol that marks trailing comments.
7762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (BufferPtr != BufferEnd && *BufferPtr == '<')
7772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr++;
7782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentState = LCS_InsideCComment;
7802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      State = LS_Normal;
7812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
7822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      goto again;
7832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    default:
7852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      llvm_unreachable("second character of comment should be '/' or '*'");
7862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
7872d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_BetweenComments: {
7892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Consecutive comments are extracted only if there is only whitespace
7902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // between them.  So we can search for the start of the next comment.
7912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const char *EndWhitespace = BufferPtr;
7922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
7932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      EndWhitespace++;
7942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
7952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    // Turn any whitespace between comments (and there is only whitespace
796a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // between them -- guaranteed by comment extraction) into a newline.  We
797a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // have two newlines between C comments in total (first one was synthesized
798a99ec107ba6b5abaf27c6cc9318e65689163f2a1Dmitri Gribenko    // after a comment).
7992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    formTokenWithChars(T, EndWhitespace, tok::newline);
8002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    CommentState = LCS_BeforeComment;
8022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    break;
8032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideBCPLComment:
8062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  case LCS_InsideCComment:
8072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    if (BufferPtr != CommentEnd) {
8082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      lexCommentText(T);
8092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      break;
8102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    } else {
8112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      // Skip C comment closing sequence.
8122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      if (CommentState == LCS_InsideCComment) {
8132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
8142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        BufferPtr += 2;
8152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        assert(BufferPtr <= BufferEnd);
8162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Synthenize newline just after the C comment, regardless if there is
8182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // actually a newline.
8192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        formTokenWithChars(T, BufferPtr, tok::newline);
8202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        break;
8232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      } else {
8242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        // Don't synthesized a newline after BCPL comment.
8252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        CommentState = LCS_BetweenComments;
8262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        goto again;
8272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko      }
8282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    }
8292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8322d44d77fed3200e2eff289f55493317e90d3398cDmitri GribenkoStringRef Lexer::getSpelling(const Token &Tok,
8332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             const SourceManager &SourceMgr,
8342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                             bool *Invalid) const {
8352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation Loc = Tok.getLocation();
8362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
8372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool InvalidTemp = false;
8392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
8402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  if (InvalidTemp) {
8412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    *Invalid = true;
8422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return StringRef();
8432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
8442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *Begin = File.data() + LocInfo.second;
8462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  return StringRef(Begin, Tok.getLength());
8472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}
8482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
8492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments
8502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang
8512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
852