12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
22d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//
32d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//                     The LLVM Compiler Infrastructure
42d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//
52d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// This file is distributed under the University of Illinois Open Source
62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// License. See LICENSE.TXT for details.
72d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//
82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===----------------------------------------------------------------------===//
92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//
102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//  This file defines lexer for structured comments and supporting token class.
112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//
122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===----------------------------------------------------------------------===//
132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#ifndef LLVM_CLANG_AST_COMMENT_LEXER_H
152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#define LLVM_CLANG_AST_COMMENT_LEXER_H
162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
17ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian#include "clang/Basic/Diagnostic.h"
18651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include "clang/Basic/SourceManager.h"
192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/SmallString.h"
202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/SmallVector.h"
2130a2e16f6c27f888dd11eba6bbbae1e980078fcbChandler Carruth#include "llvm/ADT/StringRef.h"
228d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko#include "llvm/Support/Allocator.h"
232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/raw_ostream.h"
242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang {
262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments {
272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Lexer;
298d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenkoclass TextTokenRetokenizer;
30e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkostruct CommandInfo;
31aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenkoclass CommandTraits;
322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace tok {
342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoenum TokenKind {
352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  eof,
362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  newline,
372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  text,
38808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko  unknown_command,   // Command that does not have an ID.
39808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko  backslash_command, // Command with an ID, that used backslash marker.
40808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko  at_command,        // Command with an ID, that used 'at' marker.
412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  verbatim_block_begin,
422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  verbatim_block_line,
432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  verbatim_block_end,
44962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  verbatim_line_name,
45962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  verbatim_line_text,
463f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  html_start_tag,     // <tag
472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  html_ident,         // attr
482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  html_equals,        // =
492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  html_quoted_string, // "blah\"blah" or 'blah\'blah'
502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  html_greater,       // >
51a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko  html_slash_greater, // />
523f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  html_end_tag        // </tag
532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko};
542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace tok
552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// \brief Comment token.
572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Token {
582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  friend class Lexer;
598d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  friend class TextTokenRetokenizer;
602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// The location of the token.
622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation Loc;
632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// The actual kind of the token.
652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  tok::TokenKind Kind;
662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Length of the token spelling in comment.  Can be 0 for synthenized
682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// tokens.
692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  unsigned Length;
702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Contains text value associated with a token.
72e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  const char *TextPtr;
73e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
74e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  /// Integer value associated with a token.
75e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  ///
76e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  /// If the token is a konwn command, contains command ID and TextPtr is
77e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  /// unused (command spelling can be found with CommandTraits).  Otherwise,
78e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  /// contains the length of the string that starts at TextPtr.
79e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  unsigned IntVal;
80c98e9130bcddd0258c110d30749edd2284087e3dFariborz Jahanian
812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkopublic:
822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation getLocation() const LLVM_READONLY { return Loc; }
832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setLocation(SourceLocation SL) { Loc = SL; }
842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
858d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  SourceLocation getEndLocation() const LLVM_READONLY {
868d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    if (Length == 0 || Length == 1)
878d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko      return Loc;
888d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    return Loc.getLocWithOffset(Length - 1);
898d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko  }
908d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setKind(tok::TokenKind K) { Kind = K; }
932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  unsigned getLength() const LLVM_READONLY { return Length; }
982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setLength(unsigned L) { Length = L; }
992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getText() const LLVM_READONLY {
1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::text));
102e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setText(StringRef Text) {
1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::text));
107e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Text.data();
108e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Text.size();
109e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  }
110e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
111e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  StringRef getUnknownCommandName() const LLVM_READONLY {
112e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    assert(is(tok::unknown_command));
113e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
114e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  }
115e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko
116e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  void setUnknownCommandName(StringRef Name) {
117e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    assert(is(tok::unknown_command));
118e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Name.data();
119e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Name.size();
1202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
122e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  unsigned getCommandID() const LLVM_READONLY {
1238536fa14ee1048e5e2d62cb3dc11fc640c7dc00dFariborz Jahanian    assert(is(tok::backslash_command) || is(tok::at_command));
124e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return IntVal;
1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
127e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  void setCommandID(unsigned ID) {
1288536fa14ee1048e5e2d62cb3dc11fc640c7dc00dFariborz Jahanian    assert(is(tok::backslash_command) || is(tok::at_command));
129e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = ID;
1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
132e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  unsigned getVerbatimBlockID() const LLVM_READONLY {
1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
134e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return IntVal;
1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
137e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  void setVerbatimBlockID(unsigned ID) {
1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
139e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = ID;
1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getVerbatimBlockText() const LLVM_READONLY {
1432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::verbatim_block_line));
144e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setVerbatimBlockText(StringRef Text) {
1482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::verbatim_block_line));
149e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Text.data();
150e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Text.size();
1512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
153e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  unsigned getVerbatimLineID() const LLVM_READONLY {
154962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    assert(is(tok::verbatim_line_name));
155e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return IntVal;
1562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
158e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  void setVerbatimLineID(unsigned ID) {
159962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    assert(is(tok::verbatim_line_name));
160e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = ID;
1612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getVerbatimLineText() const LLVM_READONLY {
164962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    assert(is(tok::verbatim_line_text));
165e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setVerbatimLineText(StringRef Text) {
169962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    assert(is(tok::verbatim_line_text));
170e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Text.data();
171e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Text.size();
1722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1743f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  StringRef getHTMLTagStartName() const LLVM_READONLY {
1753f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    assert(is(tok::html_start_tag));
176e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1793f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void setHTMLTagStartName(StringRef Name) {
1803f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    assert(is(tok::html_start_tag));
181e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Name.data();
182e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Name.size();
1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getHTMLIdent() const LLVM_READONLY {
1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::html_ident));
187e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setHTMLIdent(StringRef Name) {
1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::html_ident));
192e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Name.data();
193e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Name.size();
1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getHTMLQuotedString() const LLVM_READONLY {
1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::html_quoted_string));
198e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setHTMLQuotedString(StringRef Str) {
2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(is(tok::html_quoted_string));
203e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Str.data();
204e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Str.size();
2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2073f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  StringRef getHTMLTagEndName() const LLVM_READONLY {
2083f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    assert(is(tok::html_end_tag));
209e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    return StringRef(TextPtr, IntVal);
2102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2123f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void setHTMLTagEndName(StringRef Name) {
2133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    assert(is(tok::html_end_tag));
214e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    TextPtr = Name.data();
215e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko    IntVal = Name.size();
2162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
2172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void dump(const Lexer &L, const SourceManager &SM) const;
2192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko};
2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// \brief Comment lexer.
2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Lexer {
2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoprivate:
224da5922f4864b5da254c6676af8833c42adaa6d86Dmitri Gribenko  Lexer(const Lexer &) LLVM_DELETED_FUNCTION;
225da5922f4864b5da254c6676af8833c42adaa6d86Dmitri Gribenko  void operator=(const Lexer &) LLVM_DELETED_FUNCTION;
2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
227477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// Allocator for strings that are semantic values of tokens and have to be
228477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// computed (for example, resolved decimal character references).
229477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  llvm::BumpPtrAllocator &Allocator;
230477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
231ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian  DiagnosticsEngine &Diags;
232ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian
233aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko  const CommandTraits &Traits;
234aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko
2352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *const BufferStart;
2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *const BufferEnd;
2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation FileLoc;
2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *BufferPtr;
2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// One past end pointer for the current comment.  For BCPL comments points
2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// to newline or BufferEnd, for C comments points to star in '*/'.
2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  const char *CommentEnd;
2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  enum LexerCommentState {
2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LCS_BeforeComment,
2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LCS_InsideBCPLComment,
2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LCS_InsideCComment,
2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LCS_BetweenComments
2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  };
2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Low-level lexer state, track if we are inside or outside of comment.
2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  LexerCommentState CommentState;
2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  enum LexerState {
2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// Lexing normal comment text
2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LS_Normal,
2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// Finished lexing verbatim block beginning command, will lex first body
2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// line.
2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LS_VerbatimBlockFirstLine,
2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// Lexing verbatim block body line-by-line, skipping line-starting
2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// decorations.
2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    LS_VerbatimBlockBody,
2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
267962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    /// Finished lexing verbatim line beginning command, will lex text (one
268962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    /// line).
269962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko    LS_VerbatimLineText,
270962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
2723f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    LS_HTMLStartTag,
2738d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
2748d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko    /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
2753f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko    LS_HTMLEndTag
2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  };
2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Current lexing mode.
2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  LexerState State;
2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
281bed28ac1d1463adca3ecf24fca5c30646fa9dbb2Sylvestre Ledru  /// If State is LS_VerbatimBlock, contains the name of verbatim end
2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// command, including command marker.
2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SmallString<16> VerbatimBlockEndCommandName;
2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
285477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// Given a character reference name (e.g., "lt"), return the character that
286477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// it stands for (e.g., "<").
287477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
288477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
289477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// Given a Unicode codepoint as base-10 integer, return the character.
290477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
291477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
292477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  /// Given a Unicode codepoint as base-16 integer, return the character.
293477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
294477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void formTokenWithChars(Token &Result, const char *TokEnd,
296651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines                          tok::TokenKind Kind);
2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
298477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  void formTextToken(Token &Result, const char *TokEnd) {
299477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    StringRef Text(BufferPtr, TokEnd - BufferPtr);
300477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    formTokenWithChars(Result, TokEnd, tok::text);
301477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko    Result.setText(Text);
302477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  }
303477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
3042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  SourceLocation getSourceLocation(const char *Loc) const {
3052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    assert(Loc >= BufferStart && Loc <= BufferEnd &&
3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko           "Location out of range for this buffer!");
3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    const unsigned CharNo = Loc - BufferStart;
3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko    return FileLoc.getLocWithOffset(CharNo);
3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  }
3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
312ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
313ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian    return Diags.Report(Loc, DiagID);
314ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian  }
315ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian
3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Eat string matching regexp \code \s*\* \endcode.
3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void skipLineStartingDecorations();
3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  /// Lex stuff inside comments.  CommentEnd should be set correctly.
3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void lexCommentText(Token &T);
3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void setupAndLexVerbatimBlock(Token &T,
3232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                                const char *TextBegin,
324e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                                char Marker, const CommandInfo *Info);
3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void lexVerbatimBlockFirstLine(Token &T);
3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void lexVerbatimBlockBody(Token &T);
3292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
330e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko  void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
331e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko                               const CommandInfo *Info);
332962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko
333962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko  void lexVerbatimLineText(Token &T);
3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
335477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko  void lexHTMLCharacterReference(Token &T);
336477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko
3373f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void setupAndLexHTMLStartTag(Token &T);
3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3393f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void lexHTMLStartTag(Token &T);
3402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3413f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void setupAndLexHTMLEndTag(Token &T);
3428d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko
3433f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko  void lexHTMLEndTag(Token &T);
3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkopublic:
346ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian  Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
347ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian        const CommandTraits &Traits,
348af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko        SourceLocation FileLoc,
3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko        const char *BufferStart, const char *BufferEnd);
3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  void lex(Token &T);
3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko  StringRef getSpelling(const Token &Tok,
3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko                        const SourceManager &SourceMgr,
3556bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines                        bool *Invalid = nullptr) const;
3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko};
3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments
3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang
3602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#endif
3622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko
363