12d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// 22d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// 32d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// The LLVM Compiler Infrastructure 42d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// 52d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// This file is distributed under the University of Illinois Open Source 62d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// License. See LICENSE.TXT for details. 72d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// 82d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===----------------------------------------------------------------------===// 92d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// 102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// This file defines lexer for structured comments and supporting token class. 112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko// 122d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko//===----------------------------------------------------------------------===// 132d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 142d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#ifndef LLVM_CLANG_AST_COMMENT_LEXER_H 152d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#define LLVM_CLANG_AST_COMMENT_LEXER_H 162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 17ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian#include "clang/Basic/Diagnostic.h" 18651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include "clang/Basic/SourceManager.h" 192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/SmallString.h" 202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/ADT/SmallVector.h" 2130a2e16f6c27f888dd11eba6bbbae1e980078fcbChandler Carruth#include "llvm/ADT/StringRef.h" 228d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko#include "llvm/Support/Allocator.h" 232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#include "llvm/Support/raw_ostream.h" 242d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace clang { 262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace comments { 272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Lexer; 298d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenkoclass TextTokenRetokenizer; 30e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenkostruct CommandInfo; 31aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenkoclass CommandTraits; 322d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkonamespace tok { 342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoenum TokenKind { 352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko eof, 362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko newline, 372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko text, 38808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko unknown_command, // Command that does not have an ID. 39808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko backslash_command, // Command with an ID, that used backslash marker. 40808383d2d6d58a7c7db85f8c7618fb74d821309fDmitri Gribenko at_command, // Command with an ID, that used 'at' marker. 412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko verbatim_block_begin, 422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko verbatim_block_line, 432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko verbatim_block_end, 44962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko verbatim_line_name, 45962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko verbatim_line_text, 463f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko html_start_tag, // <tag 472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko html_ident, // attr 482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko html_equals, // = 492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko html_quoted_string, // "blah\"blah" or 'blah\'blah' 502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko html_greater, // > 51a5ef44ff5d93a3be6ca67782828157a71894cf0cDmitri Gribenko html_slash_greater, // /> 523f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko html_end_tag // </tag 532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}; 542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace tok 552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// \brief Comment token. 572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Token { 582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko friend class Lexer; 598d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko friend class TextTokenRetokenizer; 602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// The location of the token. 622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation Loc; 632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// The actual kind of the token. 652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko tok::TokenKind Kind; 662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Length of the token spelling in comment. Can be 0 for synthenized 682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// tokens. 692d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko unsigned Length; 702d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Contains text value associated with a token. 72e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko const char *TextPtr; 73e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko 74e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko /// Integer value associated with a token. 75e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko /// 76e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko /// If the token is a konwn command, contains command ID and TextPtr is 77e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko /// unused (command spelling can be found with CommandTraits). Otherwise, 78e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko /// contains the length of the string that starts at TextPtr. 79e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko unsigned IntVal; 80c98e9130bcddd0258c110d30749edd2284087e3dFariborz Jahanian 812d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkopublic: 822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation getLocation() const LLVM_READONLY { return Loc; } 832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setLocation(SourceLocation SL) { Loc = SL; } 842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 858d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko SourceLocation getEndLocation() const LLVM_READONLY { 868d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko if (Length == 0 || Length == 1) 878d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko return Loc; 888d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko return Loc.getLocWithOffset(Length - 1); 898d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko } 908d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko tok::TokenKind getKind() const LLVM_READONLY { return Kind; } 922d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setKind(tok::TokenKind K) { Kind = K; } 932d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; } 952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; } 962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko unsigned getLength() const LLVM_READONLY { return Length; } 982d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setLength(unsigned L) { Length = L; } 992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getText() const LLVM_READONLY { 1012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::text)); 102e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1032d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setText(StringRef Text) { 1062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::text)); 107e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Text.data(); 108e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Text.size(); 109e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko } 110e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko 111e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko StringRef getUnknownCommandName() const LLVM_READONLY { 112e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko assert(is(tok::unknown_command)); 113e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 114e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko } 115e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko 116e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko void setUnknownCommandName(StringRef Name) { 117e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko assert(is(tok::unknown_command)); 118e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Name.data(); 119e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Name.size(); 1202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 122e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko unsigned getCommandID() const LLVM_READONLY { 1238536fa14ee1048e5e2d62cb3dc11fc640c7dc00dFariborz Jahanian assert(is(tok::backslash_command) || is(tok::at_command)); 124e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return IntVal; 1252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 127e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko void setCommandID(unsigned ID) { 1288536fa14ee1048e5e2d62cb3dc11fc640c7dc00dFariborz Jahanian assert(is(tok::backslash_command) || is(tok::at_command)); 129e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = ID; 1302d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1312d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 132e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko unsigned getVerbatimBlockID() const LLVM_READONLY { 1332d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); 134e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return IntVal; 1352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 137e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko void setVerbatimBlockID(unsigned ID) { 1382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); 139e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = ID; 1402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getVerbatimBlockText() const LLVM_READONLY { 1432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::verbatim_block_line)); 144e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setVerbatimBlockText(StringRef Text) { 1482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::verbatim_block_line)); 149e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Text.data(); 150e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Text.size(); 1512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 153e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko unsigned getVerbatimLineID() const LLVM_READONLY { 154962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(is(tok::verbatim_line_name)); 155e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return IntVal; 1562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 158e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko void setVerbatimLineID(unsigned ID) { 159962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(is(tok::verbatim_line_name)); 160e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = ID; 1612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getVerbatimLineText() const LLVM_READONLY { 164962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(is(tok::verbatim_line_text)); 165e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1672d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1682d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setVerbatimLineText(StringRef Text) { 169962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko assert(is(tok::verbatim_line_text)); 170e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Text.data(); 171e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Text.size(); 1722d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1732d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1743f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko StringRef getHTMLTagStartName() const LLVM_READONLY { 1753f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(is(tok::html_start_tag)); 176e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1793f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void setHTMLTagStartName(StringRef Name) { 1803f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(is(tok::html_start_tag)); 181e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Name.data(); 182e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Name.size(); 1832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1852d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getHTMLIdent() const LLVM_READONLY { 1862d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::html_ident)); 187e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1882d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1892d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1902d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setHTMLIdent(StringRef Name) { 1912d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::html_ident)); 192e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Name.data(); 193e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Name.size(); 1942d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 1952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 1962d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getHTMLQuotedString() const LLVM_READONLY { 1972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::html_quoted_string)); 198e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 1992d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2002d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2012d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setHTMLQuotedString(StringRef Str) { 2022d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(is(tok::html_quoted_string)); 203e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Str.data(); 204e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Str.size(); 2052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2073f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko StringRef getHTMLTagEndName() const LLVM_READONLY { 2083f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(is(tok::html_end_tag)); 209e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko return StringRef(TextPtr, IntVal); 2102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2123f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void setHTMLTagEndName(StringRef Name) { 2133f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko assert(is(tok::html_end_tag)); 214e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko TextPtr = Name.data(); 215e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko IntVal = Name.size(); 2162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 2172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void dump(const Lexer &L, const SourceManager &SM) const; 2192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}; 2202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko/// \brief Comment lexer. 2222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoclass Lexer { 2232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkoprivate: 224da5922f4864b5da254c6676af8833c42adaa6d86Dmitri Gribenko Lexer(const Lexer &) LLVM_DELETED_FUNCTION; 225da5922f4864b5da254c6676af8833c42adaa6d86Dmitri Gribenko void operator=(const Lexer &) LLVM_DELETED_FUNCTION; 2262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 227477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// Allocator for strings that are semantic values of tokens and have to be 228477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// computed (for example, resolved decimal character references). 229477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko llvm::BumpPtrAllocator &Allocator; 230477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 231ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian DiagnosticsEngine &Diags; 232ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian 233aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko const CommandTraits &Traits; 234aa58081902ad31927df02e8537d972eabe29d6dfDmitri Gribenko 2352d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *const BufferStart; 2362d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *const BufferEnd; 2372d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation FileLoc; 2382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2392d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *BufferPtr; 2402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2412d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// One past end pointer for the current comment. For BCPL comments points 2422d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// to newline or BufferEnd, for C comments points to star in '*/'. 2432d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *CommentEnd; 2442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko enum LexerCommentState { 2462d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LCS_BeforeComment, 2472d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LCS_InsideBCPLComment, 2482d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LCS_InsideCComment, 2492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LCS_BetweenComments 2502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko }; 2512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Low-level lexer state, track if we are inside or outside of comment. 2532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LexerCommentState CommentState; 2542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2552d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko enum LexerState { 2562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Lexing normal comment text 2572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LS_Normal, 2582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Finished lexing verbatim block beginning command, will lex first body 2602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// line. 2612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LS_VerbatimBlockFirstLine, 2622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2632d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Lexing verbatim block body line-by-line, skipping line-starting 2642d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// decorations. 2652d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LS_VerbatimBlockBody, 2662d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 267962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko /// Finished lexing verbatim line beginning command, will lex text (one 268962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko /// line). 269962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko LS_VerbatimLineText, 270962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 2712d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes. 2723f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko LS_HTMLStartTag, 2738d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 2748d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'. 2753f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko LS_HTMLEndTag 2762d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko }; 2772d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 2782d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Current lexing mode. 2792d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko LexerState State; 2802d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 281bed28ac1d1463adca3ecf24fca5c30646fa9dbb2Sylvestre Ledru /// If State is LS_VerbatimBlock, contains the name of verbatim end 2822d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// command, including command marker. 2832d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SmallString<16> VerbatimBlockEndCommandName; 2842d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 285477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// Given a character reference name (e.g., "lt"), return the character that 286477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// it stands for (e.g., "<"). 287477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef resolveHTMLNamedCharacterReference(StringRef Name) const; 288477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 289477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// Given a Unicode codepoint as base-10 integer, return the character. 290477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const; 291477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 292477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko /// Given a Unicode codepoint as base-16 integer, return the character. 293477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef resolveHTMLHexCharacterReference(StringRef Name) const; 294477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 2952d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void formTokenWithChars(Token &Result, const char *TokEnd, 296651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines tok::TokenKind Kind); 2972d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 298477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko void formTextToken(Token &Result, const char *TokEnd) { 299477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko StringRef Text(BufferPtr, TokEnd - BufferPtr); 300477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko formTokenWithChars(Result, TokEnd, tok::text); 301477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko Result.setText(Text); 302477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko } 303477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 3042d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko SourceLocation getSourceLocation(const char *Loc) const { 3052d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko assert(Loc >= BufferStart && Loc <= BufferEnd && 3062d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko "Location out of range for this buffer!"); 3072d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3082d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const unsigned CharNo = Loc - BufferStart; 3092d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko return FileLoc.getLocWithOffset(CharNo); 3102d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko } 3112d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 312ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { 313ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian return Diags.Report(Loc, DiagID); 314ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian } 315ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian 3162d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Eat string matching regexp \code \s*\* \endcode. 3172d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void skipLineStartingDecorations(); 3182d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3192d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko /// Lex stuff inside comments. CommentEnd should be set correctly. 3202d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void lexCommentText(Token &T); 3212d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3222d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void setupAndLexVerbatimBlock(Token &T, 3232d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *TextBegin, 324e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko char Marker, const CommandInfo *Info); 3252d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3262d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void lexVerbatimBlockFirstLine(Token &T); 3272d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3282d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void lexVerbatimBlockBody(Token &T); 3292d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 330e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko void setupAndLexVerbatimLine(Token &T, const char *TextBegin, 331e4330a302ac20b41b9800267ebd4b5b01f8553f8Dmitri Gribenko const CommandInfo *Info); 332962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko 333962668d2c192dd02f75b8ec3628a89964bfb738bDmitri Gribenko void lexVerbatimLineText(Token &T); 3342d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 335477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko void lexHTMLCharacterReference(Token &T); 336477a9f58c1b197f315befd03b42a8a0b3a2f0ff9Dmitri Gribenko 3373f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void setupAndLexHTMLStartTag(Token &T); 3382d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3393f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void lexHTMLStartTag(Token &T); 3402d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3413f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void setupAndLexHTMLEndTag(Token &T); 3428d3ba23f2d9e6c87794d059412a0808c9cbacb25Dmitri Gribenko 3433f38bf2d441fac379c427f86153fbb0cb41256c6Dmitri Gribenko void lexHTMLEndTag(Token &T); 3442d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3452d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenkopublic: 346ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, 347ad6fd9f93ce0d328397e8d57ef7117ced24fc8e2Fariborz Jahanian const CommandTraits &Traits, 348af503a6f218cbef8704609812668360b0cbd0b60Dmitri Gribenko SourceLocation FileLoc, 3492d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const char *BufferStart, const char *BufferEnd); 3502d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3512d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko void lex(Token &T); 3522d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3532d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko StringRef getSpelling(const Token &Tok, 3542d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko const SourceManager &SourceMgr, 3556bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines bool *Invalid = nullptr) const; 3562d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko}; 3572d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3582d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace comments 3592d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko} // end namespace clang 3602d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 3612d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko#endif 3622d44d77fed3200e2eff289f55493317e90d3398cDmitri Gribenko 363