1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "clang/Basic/SourceManager.h" 11#include "clang/Basic/FileManager.h" 12#include "clang/Basic/Diagnostic.h" 13#include "clang/AST/CommentLexer.h" 14#include "clang/AST/CommentCommandTraits.h" 15#include "llvm/ADT/STLExtras.h" 16#include <vector> 17 18#include "gtest/gtest.h" 19 20using namespace llvm; 21using namespace clang; 22 23namespace clang { 24namespace comments { 25 26namespace { 27class CommentLexerTest : public ::testing::Test { 28protected: 29 CommentLexerTest() 30 : FileMgr(FileMgrOpts), 31 DiagID(new DiagnosticIDs()), 32 Diags(DiagID, new IgnoringDiagConsumer()), 33 SourceMgr(Diags, FileMgr), 34 Traits(Allocator) { 35 } 36 37 FileSystemOptions FileMgrOpts; 38 FileManager FileMgr; 39 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 40 DiagnosticsEngine Diags; 41 SourceManager SourceMgr; 42 llvm::BumpPtrAllocator Allocator; 43 CommandTraits Traits; 44 45 void lexString(const char *Source, std::vector<Token> &Toks); 46 47 StringRef getCommandName(const Token &Tok) { 48 return Traits.getCommandInfo(Tok.getCommandID())->Name; 49 } 50 51 StringRef getVerbatimBlockName(const Token &Tok) { 52 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name; 53 } 54 55 StringRef getVerbatimLineName(const Token &Tok) { 56 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name; 57 } 58}; 59 60void CommentLexerTest::lexString(const char *Source, 61 std::vector<Token> &Toks) { 62 MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source); 63 FileID File = SourceMgr.createFileIDForMemBuffer(Buf); 64 SourceLocation Begin = SourceMgr.getLocForStartOfFile(File); 65 66 Lexer L(Allocator, Traits, Begin, Source, Source + strlen(Source)); 67 68 while (1) { 69 Token Tok; 70 L.lex(Tok); 71 if (Tok.is(tok::eof)) 72 break; 73 Toks.push_back(Tok); 74 } 75} 76 77} // unnamed namespace 78 79// Empty source range should be handled. 80TEST_F(CommentLexerTest, Basic1) { 81 const char *Source = ""; 82 std::vector<Token> Toks; 83 84 lexString(Source, Toks); 85 86 ASSERT_EQ(0U, Toks.size()); 87} 88 89// Empty comments should be handled. 90TEST_F(CommentLexerTest, Basic2) { 91 const char *Sources[] = { 92 "//", "///", "//!", "///<", "//!<" 93 }; 94 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 95 std::vector<Token> Toks; 96 97 lexString(Sources[i], Toks); 98 99 ASSERT_EQ(1U, Toks.size()); 100 101 ASSERT_EQ(tok::newline, Toks[0].getKind()); 102 } 103} 104 105// Empty comments should be handled. 106TEST_F(CommentLexerTest, Basic3) { 107 const char *Sources[] = { 108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/" 109 }; 110 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 111 std::vector<Token> Toks; 112 113 lexString(Sources[i], Toks); 114 115 ASSERT_EQ(2U, Toks.size()); 116 117 ASSERT_EQ(tok::newline, Toks[0].getKind()); 118 ASSERT_EQ(tok::newline, Toks[1].getKind()); 119 } 120} 121 122// Single comment with plain text. 123TEST_F(CommentLexerTest, Basic4) { 124 const char *Sources[] = { 125 "// Meow", "/// Meow", "//! Meow", 126 "// Meow\n", "// Meow\r\n", "//! Meow\r", 127 }; 128 129 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 130 std::vector<Token> Toks; 131 132 lexString(Sources[i], Toks); 133 134 ASSERT_EQ(2U, Toks.size()); 135 136 ASSERT_EQ(tok::text, Toks[0].getKind()); 137 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 138 139 ASSERT_EQ(tok::newline, Toks[1].getKind()); 140 } 141} 142 143// Single comment with plain text. 144TEST_F(CommentLexerTest, Basic5) { 145 const char *Sources[] = { 146 "/* Meow*/", "/** Meow*/", "/*! Meow*/" 147 }; 148 149 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 150 std::vector<Token> Toks; 151 152 lexString(Sources[i], Toks); 153 154 ASSERT_EQ(3U, Toks.size()); 155 156 ASSERT_EQ(tok::text, Toks[0].getKind()); 157 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 158 159 ASSERT_EQ(tok::newline, Toks[1].getKind()); 160 ASSERT_EQ(tok::newline, Toks[2].getKind()); 161 } 162} 163 164// Test newline escaping. 165TEST_F(CommentLexerTest, Basic6) { 166 const char *Sources[] = { 167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n", 168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n", 169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" 170 }; 171 172 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 173 std::vector<Token> Toks; 174 175 lexString(Sources[i], Toks); 176 177 ASSERT_EQ(10U, Toks.size()); 178 179 ASSERT_EQ(tok::text, Toks[0].getKind()); 180 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 181 ASSERT_EQ(tok::text, Toks[1].getKind()); 182 ASSERT_EQ(StringRef("\\"), Toks[1].getText()); 183 ASSERT_EQ(tok::newline, Toks[2].getKind()); 184 185 ASSERT_EQ(tok::text, Toks[3].getKind()); 186 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText()); 187 ASSERT_EQ(tok::text, Toks[4].getKind()); 188 ASSERT_EQ(StringRef("\\"), Toks[4].getText()); 189 ASSERT_EQ(tok::text, Toks[5].getKind()); 190 ASSERT_EQ(StringRef(" "), Toks[5].getText()); 191 ASSERT_EQ(tok::newline, Toks[6].getKind()); 192 193 ASSERT_EQ(tok::text, Toks[7].getKind()); 194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText()); 195 ASSERT_EQ(tok::newline, Toks[8].getKind()); 196 197 ASSERT_EQ(tok::newline, Toks[9].getKind()); 198 } 199} 200 201// Check that we skip C-style aligned stars correctly. 202TEST_F(CommentLexerTest, Basic7) { 203 const char *Source = 204 "/* Aaa\n" 205 " * Bbb\r\n" 206 "\t* Ccc\n" 207 " ! Ddd\n" 208 " * Eee\n" 209 " ** Fff\n" 210 " */"; 211 std::vector<Token> Toks; 212 213 lexString(Source, Toks); 214 215 ASSERT_EQ(15U, Toks.size()); 216 217 ASSERT_EQ(tok::text, Toks[0].getKind()); 218 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 219 ASSERT_EQ(tok::newline, Toks[1].getKind()); 220 221 ASSERT_EQ(tok::text, Toks[2].getKind()); 222 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 223 ASSERT_EQ(tok::newline, Toks[3].getKind()); 224 225 ASSERT_EQ(tok::text, Toks[4].getKind()); 226 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 227 ASSERT_EQ(tok::newline, Toks[5].getKind()); 228 229 ASSERT_EQ(tok::text, Toks[6].getKind()); 230 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText()); 231 ASSERT_EQ(tok::newline, Toks[7].getKind()); 232 233 ASSERT_EQ(tok::text, Toks[8].getKind()); 234 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText()); 235 ASSERT_EQ(tok::newline, Toks[9].getKind()); 236 237 ASSERT_EQ(tok::text, Toks[10].getKind()); 238 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText()); 239 ASSERT_EQ(tok::newline, Toks[11].getKind()); 240 241 ASSERT_EQ(tok::text, Toks[12].getKind()); 242 ASSERT_EQ(StringRef(" "), Toks[12].getText()); 243 244 ASSERT_EQ(tok::newline, Toks[13].getKind()); 245 ASSERT_EQ(tok::newline, Toks[14].getKind()); 246} 247 248// A command marker followed by comment end. 249TEST_F(CommentLexerTest, DoxygenCommand1) { 250 const char *Sources[] = { "//@", "///@", "//!@" }; 251 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 252 std::vector<Token> Toks; 253 254 lexString(Sources[i], Toks); 255 256 ASSERT_EQ(2U, Toks.size()); 257 258 ASSERT_EQ(tok::text, Toks[0].getKind()); 259 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 260 261 ASSERT_EQ(tok::newline, Toks[1].getKind()); 262 } 263} 264 265// A command marker followed by comment end. 266TEST_F(CommentLexerTest, DoxygenCommand2) { 267 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"}; 268 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 269 std::vector<Token> Toks; 270 271 lexString(Sources[i], Toks); 272 273 ASSERT_EQ(3U, Toks.size()); 274 275 ASSERT_EQ(tok::text, Toks[0].getKind()); 276 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 277 278 ASSERT_EQ(tok::newline, Toks[1].getKind()); 279 ASSERT_EQ(tok::newline, Toks[2].getKind()); 280 } 281} 282 283// A command marker followed by comment end. 284TEST_F(CommentLexerTest, DoxygenCommand3) { 285 const char *Sources[] = { "/*\\*/", "/**\\*/" }; 286 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 287 std::vector<Token> Toks; 288 289 lexString(Sources[i], Toks); 290 291 ASSERT_EQ(3U, Toks.size()); 292 293 ASSERT_EQ(tok::text, Toks[0].getKind()); 294 ASSERT_EQ(StringRef("\\"), Toks[0].getText()); 295 296 ASSERT_EQ(tok::newline, Toks[1].getKind()); 297 ASSERT_EQ(tok::newline, Toks[2].getKind()); 298 } 299} 300 301// Doxygen escape sequences. 302TEST_F(CommentLexerTest, DoxygenCommand4) { 303 const char *Source = 304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::"; 305 const char *Text[] = { 306 " ", 307 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ", 308 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ", 309 "::", "" 310 }; 311 312 std::vector<Token> Toks; 313 314 lexString(Source, Toks); 315 316 ASSERT_EQ(array_lengthof(Text), Toks.size()); 317 318 for (size_t i = 0, e = Toks.size(); i != e; i++) { 319 if(Toks[i].is(tok::text)) 320 ASSERT_EQ(StringRef(Text[i]), Toks[i].getText()) 321 << "index " << i; 322 } 323} 324 325TEST_F(CommentLexerTest, DoxygenCommand5) { 326 const char *Source = "/// \\brief Aaa."; 327 std::vector<Token> Toks; 328 329 lexString(Source, Toks); 330 331 ASSERT_EQ(4U, Toks.size()); 332 333 ASSERT_EQ(tok::text, Toks[0].getKind()); 334 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 335 336 ASSERT_EQ(tok::command, Toks[1].getKind()); 337 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1])); 338 339 ASSERT_EQ(tok::text, Toks[2].getKind()); 340 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); 341 342 ASSERT_EQ(tok::newline, Toks[3].getKind()); 343} 344 345TEST_F(CommentLexerTest, DoxygenCommand6) { 346 const char *Source = "/// \\em\\em \\em\t\\em\n"; 347 std::vector<Token> Toks; 348 349 lexString(Source, Toks); 350 351 ASSERT_EQ(8U, Toks.size()); 352 353 ASSERT_EQ(tok::text, Toks[0].getKind()); 354 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 355 356 ASSERT_EQ(tok::command, Toks[1].getKind()); 357 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1])); 358 359 ASSERT_EQ(tok::command, Toks[2].getKind()); 360 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2])); 361 362 ASSERT_EQ(tok::text, Toks[3].getKind()); 363 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 364 365 ASSERT_EQ(tok::command, Toks[4].getKind()); 366 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4])); 367 368 ASSERT_EQ(tok::text, Toks[5].getKind()); 369 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 370 371 ASSERT_EQ(tok::command, Toks[6].getKind()); 372 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6])); 373 374 ASSERT_EQ(tok::newline, Toks[7].getKind()); 375} 376 377TEST_F(CommentLexerTest, DoxygenCommand7) { 378 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n"; 379 std::vector<Token> Toks; 380 381 lexString(Source, Toks); 382 383 ASSERT_EQ(8U, Toks.size()); 384 385 ASSERT_EQ(tok::text, Toks[0].getKind()); 386 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 387 388 ASSERT_EQ(tok::unknown_command, Toks[1].getKind()); 389 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName()); 390 391 ASSERT_EQ(tok::unknown_command, Toks[2].getKind()); 392 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName()); 393 394 ASSERT_EQ(tok::text, Toks[3].getKind()); 395 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 396 397 ASSERT_EQ(tok::unknown_command, Toks[4].getKind()); 398 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName()); 399 400 ASSERT_EQ(tok::text, Toks[5].getKind()); 401 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 402 403 ASSERT_EQ(tok::unknown_command, Toks[6].getKind()); 404 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName()); 405 406 ASSERT_EQ(tok::newline, Toks[7].getKind()); 407} 408 409TEST_F(CommentLexerTest, DoxygenCommand8) { 410 const char *Source = "// \\c\n"; 411 std::vector<Token> Toks; 412 413 lexString(Source, Toks); 414 415 ASSERT_EQ(3U, Toks.size()); 416 417 ASSERT_EQ(tok::text, Toks[0].getKind()); 418 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 419 420 ASSERT_EQ(tok::command, Toks[1].getKind()); 421 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1])); 422 423 ASSERT_EQ(tok::newline, Toks[2].getKind()); 424} 425 426// Empty verbatim block. 427TEST_F(CommentLexerTest, VerbatimBlock1) { 428 const char *Sources[] = { 429 "/// \\verbatim\\endverbatim\n//", 430 "/** \\verbatim\\endverbatim*/" 431 }; 432 433 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 434 std::vector<Token> Toks; 435 436 lexString(Sources[i], Toks); 437 438 ASSERT_EQ(5U, Toks.size()); 439 440 ASSERT_EQ(tok::text, Toks[0].getKind()); 441 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 442 443 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 444 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 445 446 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); 447 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2])); 448 449 ASSERT_EQ(tok::newline, Toks[3].getKind()); 450 ASSERT_EQ(tok::newline, Toks[4].getKind()); 451 } 452} 453 454// Empty verbatim block without an end command. 455TEST_F(CommentLexerTest, VerbatimBlock2) { 456 const char *Source = "/// \\verbatim"; 457 458 std::vector<Token> Toks; 459 460 lexString(Source, Toks); 461 462 ASSERT_EQ(3U, Toks.size()); 463 464 ASSERT_EQ(tok::text, Toks[0].getKind()); 465 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 466 467 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 468 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 469 470 ASSERT_EQ(tok::newline, Toks[2].getKind()); 471} 472 473// Empty verbatim block without an end command. 474TEST_F(CommentLexerTest, VerbatimBlock3) { 475 const char *Source = "/** \\verbatim*/"; 476 477 std::vector<Token> Toks; 478 479 lexString(Source, Toks); 480 481 ASSERT_EQ(4U, Toks.size()); 482 483 ASSERT_EQ(tok::text, Toks[0].getKind()); 484 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 485 486 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 487 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 488 489 ASSERT_EQ(tok::newline, Toks[2].getKind()); 490 ASSERT_EQ(tok::newline, Toks[3].getKind()); 491} 492 493// Single-line verbatim block. 494TEST_F(CommentLexerTest, VerbatimBlock4) { 495 const char *Sources[] = { 496 "/// Meow \\verbatim aaa \\endverbatim\n//", 497 "/** Meow \\verbatim aaa \\endverbatim*/" 498 }; 499 500 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 501 std::vector<Token> Toks; 502 503 lexString(Sources[i], Toks); 504 505 ASSERT_EQ(6U, Toks.size()); 506 507 ASSERT_EQ(tok::text, Toks[0].getKind()); 508 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 509 510 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 511 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 512 513 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 514 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 515 516 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 517 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3])); 518 519 ASSERT_EQ(tok::newline, Toks[4].getKind()); 520 ASSERT_EQ(tok::newline, Toks[5].getKind()); 521 } 522} 523 524// Single-line verbatim block without an end command. 525TEST_F(CommentLexerTest, VerbatimBlock5) { 526 const char *Sources[] = { 527 "/// Meow \\verbatim aaa \n//", 528 "/** Meow \\verbatim aaa */" 529 }; 530 531 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 532 std::vector<Token> Toks; 533 534 lexString(Sources[i], Toks); 535 536 ASSERT_EQ(5U, Toks.size()); 537 538 ASSERT_EQ(tok::text, Toks[0].getKind()); 539 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 540 541 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 542 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 543 544 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 545 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 546 547 ASSERT_EQ(tok::newline, Toks[3].getKind()); 548 ASSERT_EQ(tok::newline, Toks[4].getKind()); 549 } 550} 551 552TEST_F(CommentLexerTest, VerbatimBlock6) { 553 const char *Source = 554 "// \\verbatim\n" 555 "// Aaa\n" 556 "//\n" 557 "// Bbb\n" 558 "// \\endverbatim\n"; 559 560 std::vector<Token> Toks; 561 562 lexString(Source, Toks); 563 564 ASSERT_EQ(10U, Toks.size()); 565 566 ASSERT_EQ(tok::text, Toks[0].getKind()); 567 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 568 569 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 570 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 571 572 ASSERT_EQ(tok::newline, Toks[2].getKind()); 573 574 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 575 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText()); 576 577 ASSERT_EQ(tok::newline, Toks[4].getKind()); 578 579 ASSERT_EQ(tok::newline, Toks[5].getKind()); 580 581 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 582 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText()); 583 584 ASSERT_EQ(tok::newline, Toks[7].getKind()); 585 586 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind()); 587 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8])); 588 589 ASSERT_EQ(tok::newline, Toks[9].getKind()); 590} 591 592TEST_F(CommentLexerTest, VerbatimBlock7) { 593 const char *Source = 594 "/* \\verbatim\n" 595 " * Aaa\n" 596 " *\n" 597 " * Bbb\n" 598 " * \\endverbatim\n" 599 " */"; 600 601 std::vector<Token> Toks; 602 603 lexString(Source, Toks); 604 605 ASSERT_EQ(10U, Toks.size()); 606 607 ASSERT_EQ(tok::text, Toks[0].getKind()); 608 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 609 610 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 611 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 612 613 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 614 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText()); 615 616 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 617 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText()); 618 619 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 620 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText()); 621 622 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind()); 623 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5])); 624 625 ASSERT_EQ(tok::newline, Toks[6].getKind()); 626 627 ASSERT_EQ(tok::text, Toks[7].getKind()); 628 ASSERT_EQ(StringRef(" "), Toks[7].getText()); 629 630 ASSERT_EQ(tok::newline, Toks[8].getKind()); 631 ASSERT_EQ(tok::newline, Toks[9].getKind()); 632} 633 634// Complex test for verbatim blocks. 635TEST_F(CommentLexerTest, VerbatimBlock8) { 636 const char *Source = 637 "/* Meow \\verbatim aaa\\$\\@\n" 638 "bbb \\endverbati\r" 639 "ccc\r\n" 640 "ddd \\endverbatim Blah \\verbatim eee\n" 641 "\\endverbatim BlahBlah*/"; 642 std::vector<Token> Toks; 643 644 lexString(Source, Toks); 645 646 ASSERT_EQ(14U, Toks.size()); 647 648 ASSERT_EQ(tok::text, Toks[0].getKind()); 649 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 650 651 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 652 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 653 654 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 655 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText()); 656 657 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 658 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText()); 659 660 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 661 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText()); 662 663 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); 664 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText()); 665 666 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); 667 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6])); 668 669 ASSERT_EQ(tok::text, Toks[7].getKind()); 670 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText()); 671 672 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); 673 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8])); 674 675 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); 676 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText()); 677 678 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); 679 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10])); 680 681 ASSERT_EQ(tok::text, Toks[11].getKind()); 682 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText()); 683 684 ASSERT_EQ(tok::newline, Toks[12].getKind()); 685 ASSERT_EQ(tok::newline, Toks[13].getKind()); 686} 687 688// LaTeX verbatim blocks. 689TEST_F(CommentLexerTest, VerbatimBlock9) { 690 const char *Source = 691 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}"; 692 std::vector<Token> Toks; 693 694 lexString(Source, Toks); 695 696 ASSERT_EQ(13U, Toks.size()); 697 698 ASSERT_EQ(tok::text, Toks[0].getKind()); 699 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 700 701 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 702 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1])); 703 704 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 705 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText()); 706 707 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 708 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3])); 709 710 ASSERT_EQ(tok::text, Toks[4].getKind()); 711 ASSERT_EQ(StringRef(" "), Toks[4].getText()); 712 713 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); 714 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5])); 715 716 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 717 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText()); 718 719 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); 720 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7])); 721 722 ASSERT_EQ(tok::text, Toks[8].getKind()); 723 ASSERT_EQ(StringRef(" "), Toks[8].getText()); 724 725 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); 726 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9])); 727 728 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); 729 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText()); 730 731 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); 732 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11])); 733 734 ASSERT_EQ(tok::newline, Toks[12].getKind()); 735} 736 737// Empty verbatim line. 738TEST_F(CommentLexerTest, VerbatimLine1) { 739 const char *Sources[] = { 740 "/// \\fn\n//", 741 "/** \\fn*/" 742 }; 743 744 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 745 std::vector<Token> Toks; 746 747 lexString(Sources[i], Toks); 748 749 ASSERT_EQ(4U, Toks.size()); 750 751 ASSERT_EQ(tok::text, Toks[0].getKind()); 752 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 753 754 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 755 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 756 757 ASSERT_EQ(tok::newline, Toks[2].getKind()); 758 ASSERT_EQ(tok::newline, Toks[3].getKind()); 759 } 760} 761 762// Verbatim line with Doxygen escape sequences, which should not be expanded. 763TEST_F(CommentLexerTest, VerbatimLine2) { 764 const char *Sources[] = { 765 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//", 766 "/** \\fn void *foo(const char *zzz = \"\\$\");*/" 767 }; 768 769 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 770 std::vector<Token> Toks; 771 772 lexString(Sources[i], Toks); 773 774 ASSERT_EQ(5U, Toks.size()); 775 776 ASSERT_EQ(tok::text, Toks[0].getKind()); 777 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 778 779 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 780 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 781 782 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 783 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 784 Toks[2].getVerbatimLineText()); 785 786 ASSERT_EQ(tok::newline, Toks[3].getKind()); 787 ASSERT_EQ(tok::newline, Toks[4].getKind()); 788 } 789} 790 791// Verbatim line should not eat anything from next source line. 792TEST_F(CommentLexerTest, VerbatimLine3) { 793 const char *Source = 794 "/** \\fn void *foo(const char *zzz = \"\\$\");\n" 795 " * Meow\n" 796 " */"; 797 798 std::vector<Token> Toks; 799 800 lexString(Source, Toks); 801 802 ASSERT_EQ(9U, Toks.size()); 803 804 ASSERT_EQ(tok::text, Toks[0].getKind()); 805 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 806 807 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 808 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 809 810 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 811 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 812 Toks[2].getVerbatimLineText()); 813 ASSERT_EQ(tok::newline, Toks[3].getKind()); 814 815 ASSERT_EQ(tok::text, Toks[4].getKind()); 816 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText()); 817 ASSERT_EQ(tok::newline, Toks[5].getKind()); 818 819 ASSERT_EQ(tok::text, Toks[6].getKind()); 820 ASSERT_EQ(StringRef(" "), Toks[6].getText()); 821 822 ASSERT_EQ(tok::newline, Toks[7].getKind()); 823 ASSERT_EQ(tok::newline, Toks[8].getKind()); 824} 825 826TEST_F(CommentLexerTest, HTML1) { 827 const char *Source = 828 "// <"; 829 830 std::vector<Token> Toks; 831 832 lexString(Source, Toks); 833 834 ASSERT_EQ(3U, Toks.size()); 835 836 ASSERT_EQ(tok::text, Toks[0].getKind()); 837 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 838 839 ASSERT_EQ(tok::text, Toks[1].getKind()); 840 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 841 842 ASSERT_EQ(tok::newline, Toks[2].getKind()); 843} 844 845TEST_F(CommentLexerTest, HTML2) { 846 const char *Source = 847 "// a<2"; 848 849 std::vector<Token> Toks; 850 851 lexString(Source, Toks); 852 853 ASSERT_EQ(4U, Toks.size()); 854 855 ASSERT_EQ(tok::text, Toks[0].getKind()); 856 ASSERT_EQ(StringRef(" a"), Toks[0].getText()); 857 858 ASSERT_EQ(tok::text, Toks[1].getKind()); 859 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 860 861 ASSERT_EQ(tok::text, Toks[2].getKind()); 862 ASSERT_EQ(StringRef("2"), Toks[2].getText()); 863 864 ASSERT_EQ(tok::newline, Toks[3].getKind()); 865} 866 867TEST_F(CommentLexerTest, HTML3) { 868 const char *Source = 869 "// < img"; 870 871 std::vector<Token> Toks; 872 873 lexString(Source, Toks); 874 875 ASSERT_EQ(4U, Toks.size()); 876 877 ASSERT_EQ(tok::text, Toks[0].getKind()); 878 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 879 880 ASSERT_EQ(tok::text, Toks[1].getKind()); 881 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 882 883 ASSERT_EQ(tok::text, Toks[2].getKind()); 884 ASSERT_EQ(StringRef(" img"), Toks[2].getText()); 885 886 ASSERT_EQ(tok::newline, Toks[3].getKind()); 887} 888 889TEST_F(CommentLexerTest, HTML4) { 890 const char *Sources[] = { 891 "// <img", 892 "// <img " 893 }; 894 895 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 896 std::vector<Token> Toks; 897 898 lexString(Sources[i], Toks); 899 900 ASSERT_EQ(3U, Toks.size()); 901 902 ASSERT_EQ(tok::text, Toks[0].getKind()); 903 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 904 905 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 906 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 907 908 ASSERT_EQ(tok::newline, Toks[2].getKind()); 909 } 910} 911 912TEST_F(CommentLexerTest, HTML5) { 913 const char *Source = 914 "// <img 42"; 915 916 std::vector<Token> Toks; 917 918 lexString(Source, Toks); 919 920 ASSERT_EQ(4U, Toks.size()); 921 922 ASSERT_EQ(tok::text, Toks[0].getKind()); 923 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 924 925 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 926 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 927 928 ASSERT_EQ(tok::text, Toks[2].getKind()); 929 ASSERT_EQ(StringRef("42"), Toks[2].getText()); 930 931 ASSERT_EQ(tok::newline, Toks[3].getKind()); 932} 933 934TEST_F(CommentLexerTest, HTML6) { 935 const char *Source = "// <img> Meow"; 936 937 std::vector<Token> Toks; 938 939 lexString(Source, Toks); 940 941 ASSERT_EQ(5U, Toks.size()); 942 943 ASSERT_EQ(tok::text, Toks[0].getKind()); 944 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 945 946 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 947 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 948 949 ASSERT_EQ(tok::html_greater, Toks[2].getKind()); 950 951 ASSERT_EQ(tok::text, Toks[3].getKind()); 952 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText()); 953 954 ASSERT_EQ(tok::newline, Toks[4].getKind()); 955} 956 957TEST_F(CommentLexerTest, HTML7) { 958 const char *Source = "// <img="; 959 960 std::vector<Token> Toks; 961 962 lexString(Source, Toks); 963 964 ASSERT_EQ(4U, Toks.size()); 965 966 ASSERT_EQ(tok::text, Toks[0].getKind()); 967 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 968 969 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 970 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 971 972 ASSERT_EQ(tok::text, Toks[2].getKind()); 973 ASSERT_EQ(StringRef("="), Toks[2].getText()); 974 975 ASSERT_EQ(tok::newline, Toks[3].getKind()); 976} 977 978TEST_F(CommentLexerTest, HTML8) { 979 const char *Source = "// <img src=> Meow"; 980 981 std::vector<Token> Toks; 982 983 lexString(Source, Toks); 984 985 ASSERT_EQ(7U, Toks.size()); 986 987 ASSERT_EQ(tok::text, Toks[0].getKind()); 988 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 989 990 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 991 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 992 993 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 994 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 995 996 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 997 998 ASSERT_EQ(tok::html_greater, Toks[4].getKind()); 999 1000 ASSERT_EQ(tok::text, Toks[5].getKind()); 1001 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText()); 1002 1003 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1004} 1005 1006TEST_F(CommentLexerTest, HTML9) { 1007 const char *Sources[] = { 1008 "// <img src", 1009 "// <img src " 1010 }; 1011 1012 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1013 std::vector<Token> Toks; 1014 1015 lexString(Sources[i], Toks); 1016 1017 ASSERT_EQ(4U, Toks.size()); 1018 1019 ASSERT_EQ(tok::text, Toks[0].getKind()); 1020 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1021 1022 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1023 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1024 1025 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1026 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1027 1028 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1029 } 1030} 1031 1032TEST_F(CommentLexerTest, HTML10) { 1033 const char *Sources[] = { 1034 "// <img src=", 1035 "// <img src =" 1036 }; 1037 1038 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1039 std::vector<Token> Toks; 1040 1041 lexString(Sources[i], Toks); 1042 1043 ASSERT_EQ(5U, Toks.size()); 1044 1045 ASSERT_EQ(tok::text, Toks[0].getKind()); 1046 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1047 1048 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1049 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1050 1051 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1052 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1053 1054 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1055 1056 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1057 } 1058} 1059 1060TEST_F(CommentLexerTest, HTML11) { 1061 const char *Sources[] = { 1062 "// <img src=\"", 1063 "// <img src = \"", 1064 "// <img src=\'", 1065 "// <img src = \'" 1066 }; 1067 1068 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1069 std::vector<Token> Toks; 1070 1071 lexString(Sources[i], Toks); 1072 1073 ASSERT_EQ(6U, Toks.size()); 1074 1075 ASSERT_EQ(tok::text, Toks[0].getKind()); 1076 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1077 1078 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1079 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1080 1081 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1082 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1083 1084 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1085 1086 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1087 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString()); 1088 1089 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1090 } 1091} 1092 1093TEST_F(CommentLexerTest, HTML12) { 1094 const char *Source = "// <img src=@"; 1095 1096 std::vector<Token> Toks; 1097 1098 lexString(Source, Toks); 1099 1100 ASSERT_EQ(6U, Toks.size()); 1101 1102 ASSERT_EQ(tok::text, Toks[0].getKind()); 1103 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1104 1105 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1106 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1107 1108 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1109 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1110 1111 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1112 1113 ASSERT_EQ(tok::text, Toks[4].getKind()); 1114 ASSERT_EQ(StringRef("@"), Toks[4].getText()); 1115 1116 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1117} 1118 1119TEST_F(CommentLexerTest, HTML13) { 1120 const char *Sources[] = { 1121 "// <img src=\"val\\\"\\'val", 1122 "// <img src=\"val\\\"\\'val\"", 1123 "// <img src=\'val\\\"\\'val", 1124 "// <img src=\'val\\\"\\'val\'" 1125 }; 1126 1127 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1128 std::vector<Token> Toks; 1129 1130 lexString(Sources[i], Toks); 1131 1132 ASSERT_EQ(6U, Toks.size()); 1133 1134 ASSERT_EQ(tok::text, Toks[0].getKind()); 1135 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1136 1137 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1138 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1139 1140 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1141 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1142 1143 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1144 1145 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1146 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1147 1148 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1149 } 1150} 1151 1152TEST_F(CommentLexerTest, HTML14) { 1153 const char *Sources[] = { 1154 "// <img src=\"val\\\"\\'val\">", 1155 "// <img src=\'val\\\"\\'val\'>" 1156 }; 1157 1158 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1159 std::vector<Token> Toks; 1160 1161 lexString(Sources[i], Toks); 1162 1163 ASSERT_EQ(7U, Toks.size()); 1164 1165 ASSERT_EQ(tok::text, Toks[0].getKind()); 1166 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1167 1168 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1169 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1170 1171 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1172 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1173 1174 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1175 1176 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1177 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1178 1179 ASSERT_EQ(tok::html_greater, Toks[5].getKind()); 1180 1181 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1182 } 1183} 1184 1185TEST_F(CommentLexerTest, HTML15) { 1186 const char *Sources[] = { 1187 "// <img/>", 1188 "// <img />" 1189 }; 1190 1191 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1192 std::vector<Token> Toks; 1193 1194 lexString(Sources[i], Toks); 1195 1196 ASSERT_EQ(4U, Toks.size()); 1197 1198 ASSERT_EQ(tok::text, Toks[0].getKind()); 1199 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1200 1201 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1202 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1203 1204 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind()); 1205 1206 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1207 } 1208} 1209 1210TEST_F(CommentLexerTest, HTML16) { 1211 const char *Sources[] = { 1212 "// <img/ Aaa", 1213 "// <img / Aaa" 1214 }; 1215 1216 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1217 std::vector<Token> Toks; 1218 1219 lexString(Sources[i], Toks); 1220 1221 ASSERT_EQ(5U, Toks.size()); 1222 1223 ASSERT_EQ(tok::text, Toks[0].getKind()); 1224 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1225 1226 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1227 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1228 1229 ASSERT_EQ(tok::text, Toks[2].getKind()); 1230 ASSERT_EQ(StringRef("/"), Toks[2].getText()); 1231 1232 ASSERT_EQ(tok::text, Toks[3].getKind()); 1233 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText()); 1234 1235 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1236 } 1237} 1238 1239TEST_F(CommentLexerTest, HTML17) { 1240 const char *Source = "// </"; 1241 1242 std::vector<Token> Toks; 1243 1244 lexString(Source, Toks); 1245 1246 ASSERT_EQ(3U, Toks.size()); 1247 1248 ASSERT_EQ(tok::text, Toks[0].getKind()); 1249 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1250 1251 ASSERT_EQ(tok::text, Toks[1].getKind()); 1252 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1253 1254 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1255} 1256 1257TEST_F(CommentLexerTest, HTML18) { 1258 const char *Source = "// </@"; 1259 1260 std::vector<Token> Toks; 1261 1262 lexString(Source, Toks); 1263 1264 ASSERT_EQ(4U, Toks.size()); 1265 1266 ASSERT_EQ(tok::text, Toks[0].getKind()); 1267 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1268 1269 ASSERT_EQ(tok::text, Toks[1].getKind()); 1270 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1271 1272 ASSERT_EQ(tok::text, Toks[2].getKind()); 1273 ASSERT_EQ(StringRef("@"), Toks[2].getText()); 1274 1275 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1276} 1277 1278TEST_F(CommentLexerTest, HTML19) { 1279 const char *Source = "// </img"; 1280 1281 std::vector<Token> Toks; 1282 1283 lexString(Source, Toks); 1284 1285 ASSERT_EQ(3U, Toks.size()); 1286 1287 ASSERT_EQ(tok::text, Toks[0].getKind()); 1288 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1289 1290 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind()); 1291 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName()); 1292 1293 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1294} 1295 1296TEST_F(CommentLexerTest, NotAKnownHTMLTag1) { 1297 const char *Source = "// <tag>"; 1298 1299 std::vector<Token> Toks; 1300 1301 lexString(Source, Toks); 1302 1303 ASSERT_EQ(4U, Toks.size()); 1304 1305 ASSERT_EQ(tok::text, Toks[0].getKind()); 1306 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1307 1308 ASSERT_EQ(tok::text, Toks[1].getKind()); 1309 ASSERT_EQ(StringRef("<tag"), Toks[1].getText()); 1310 1311 ASSERT_EQ(tok::text, Toks[2].getKind()); 1312 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1313 1314 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1315} 1316 1317TEST_F(CommentLexerTest, NotAKnownHTMLTag2) { 1318 const char *Source = "// </tag>"; 1319 1320 std::vector<Token> Toks; 1321 1322 lexString(Source, Toks); 1323 1324 ASSERT_EQ(4U, Toks.size()); 1325 1326 ASSERT_EQ(tok::text, Toks[0].getKind()); 1327 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1328 1329 ASSERT_EQ(tok::text, Toks[1].getKind()); 1330 ASSERT_EQ(StringRef("</tag"), Toks[1].getText()); 1331 1332 ASSERT_EQ(tok::text, Toks[2].getKind()); 1333 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1334 1335 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1336} 1337 1338TEST_F(CommentLexerTest, HTMLCharacterReferences1) { 1339 const char *Source = "// &"; 1340 1341 std::vector<Token> Toks; 1342 1343 lexString(Source, Toks); 1344 1345 ASSERT_EQ(3U, Toks.size()); 1346 1347 ASSERT_EQ(tok::text, Toks[0].getKind()); 1348 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1349 1350 ASSERT_EQ(tok::text, Toks[1].getKind()); 1351 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1352 1353 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1354} 1355 1356TEST_F(CommentLexerTest, HTMLCharacterReferences2) { 1357 const char *Source = "// &!"; 1358 1359 std::vector<Token> Toks; 1360 1361 lexString(Source, Toks); 1362 1363 ASSERT_EQ(4U, Toks.size()); 1364 1365 ASSERT_EQ(tok::text, Toks[0].getKind()); 1366 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1367 1368 ASSERT_EQ(tok::text, Toks[1].getKind()); 1369 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1370 1371 ASSERT_EQ(tok::text, Toks[2].getKind()); 1372 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1373 1374 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1375} 1376 1377TEST_F(CommentLexerTest, HTMLCharacterReferences3) { 1378 const char *Source = "// &"; 1379 1380 std::vector<Token> Toks; 1381 1382 lexString(Source, Toks); 1383 1384 ASSERT_EQ(3U, Toks.size()); 1385 1386 ASSERT_EQ(tok::text, Toks[0].getKind()); 1387 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1388 1389 ASSERT_EQ(tok::text, Toks[1].getKind()); 1390 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1391 1392 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1393} 1394 1395TEST_F(CommentLexerTest, HTMLCharacterReferences4) { 1396 const char *Source = "// &!"; 1397 1398 std::vector<Token> Toks; 1399 1400 lexString(Source, Toks); 1401 1402 ASSERT_EQ(4U, Toks.size()); 1403 1404 ASSERT_EQ(tok::text, Toks[0].getKind()); 1405 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1406 1407 ASSERT_EQ(tok::text, Toks[1].getKind()); 1408 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1409 1410 ASSERT_EQ(tok::text, Toks[2].getKind()); 1411 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1412 1413 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1414} 1415 1416TEST_F(CommentLexerTest, HTMLCharacterReferences5) { 1417 const char *Source = "// &#"; 1418 1419 std::vector<Token> Toks; 1420 1421 lexString(Source, Toks); 1422 1423 ASSERT_EQ(3U, Toks.size()); 1424 1425 ASSERT_EQ(tok::text, Toks[0].getKind()); 1426 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1427 1428 ASSERT_EQ(tok::text, Toks[1].getKind()); 1429 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1430 1431 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1432} 1433 1434TEST_F(CommentLexerTest, HTMLCharacterReferences6) { 1435 const char *Source = "// &#a"; 1436 1437 std::vector<Token> Toks; 1438 1439 lexString(Source, Toks); 1440 1441 ASSERT_EQ(4U, Toks.size()); 1442 1443 ASSERT_EQ(tok::text, Toks[0].getKind()); 1444 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1445 1446 ASSERT_EQ(tok::text, Toks[1].getKind()); 1447 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1448 1449 ASSERT_EQ(tok::text, Toks[2].getKind()); 1450 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1451 1452 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1453} 1454 1455TEST_F(CommentLexerTest, HTMLCharacterReferences7) { 1456 const char *Source = "// *"; 1457 1458 std::vector<Token> Toks; 1459 1460 lexString(Source, Toks); 1461 1462 ASSERT_EQ(3U, Toks.size()); 1463 1464 ASSERT_EQ(tok::text, Toks[0].getKind()); 1465 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1466 1467 ASSERT_EQ(tok::text, Toks[1].getKind()); 1468 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1469 1470 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1471} 1472 1473TEST_F(CommentLexerTest, HTMLCharacterReferences8) { 1474 const char *Source = "// *a"; 1475 1476 std::vector<Token> Toks; 1477 1478 lexString(Source, Toks); 1479 1480 ASSERT_EQ(4U, Toks.size()); 1481 1482 ASSERT_EQ(tok::text, Toks[0].getKind()); 1483 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1484 1485 ASSERT_EQ(tok::text, Toks[1].getKind()); 1486 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1487 1488 ASSERT_EQ(tok::text, Toks[2].getKind()); 1489 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1490 1491 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1492} 1493 1494TEST_F(CommentLexerTest, HTMLCharacterReferences9) { 1495 const char *Source = "// &#x"; 1496 1497 std::vector<Token> Toks; 1498 1499 lexString(Source, Toks); 1500 1501 ASSERT_EQ(3U, Toks.size()); 1502 1503 ASSERT_EQ(tok::text, Toks[0].getKind()); 1504 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1505 1506 ASSERT_EQ(tok::text, Toks[1].getKind()); 1507 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1508 1509 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1510} 1511 1512TEST_F(CommentLexerTest, HTMLCharacterReferences10) { 1513 const char *Source = "// &#xz"; 1514 1515 std::vector<Token> Toks; 1516 1517 lexString(Source, Toks); 1518 1519 ASSERT_EQ(4U, Toks.size()); 1520 1521 ASSERT_EQ(tok::text, Toks[0].getKind()); 1522 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1523 1524 ASSERT_EQ(tok::text, Toks[1].getKind()); 1525 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1526 1527 ASSERT_EQ(tok::text, Toks[2].getKind()); 1528 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1529 1530 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1531} 1532 1533TEST_F(CommentLexerTest, HTMLCharacterReferences11) { 1534 const char *Source = "// «"; 1535 1536 std::vector<Token> Toks; 1537 1538 lexString(Source, Toks); 1539 1540 ASSERT_EQ(3U, Toks.size()); 1541 1542 ASSERT_EQ(tok::text, Toks[0].getKind()); 1543 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1544 1545 ASSERT_EQ(tok::text, Toks[1].getKind()); 1546 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1547 1548 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1549} 1550 1551TEST_F(CommentLexerTest, HTMLCharacterReferences12) { 1552 const char *Source = "// «z"; 1553 1554 std::vector<Token> Toks; 1555 1556 lexString(Source, Toks); 1557 1558 ASSERT_EQ(4U, Toks.size()); 1559 1560 ASSERT_EQ(tok::text, Toks[0].getKind()); 1561 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1562 1563 ASSERT_EQ(tok::text, Toks[1].getKind()); 1564 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1565 1566 ASSERT_EQ(tok::text, Toks[2].getKind()); 1567 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1568 1569 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1570} 1571 1572TEST_F(CommentLexerTest, HTMLCharacterReferences13) { 1573 const char *Source = "// &"; 1574 1575 std::vector<Token> Toks; 1576 1577 lexString(Source, Toks); 1578 1579 ASSERT_EQ(3U, Toks.size()); 1580 1581 ASSERT_EQ(tok::text, Toks[0].getKind()); 1582 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1583 1584 ASSERT_EQ(tok::text, Toks[1].getKind()); 1585 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1586 1587 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1588} 1589 1590TEST_F(CommentLexerTest, HTMLCharacterReferences14) { 1591 const char *Source = "// &<"; 1592 1593 std::vector<Token> Toks; 1594 1595 lexString(Source, Toks); 1596 1597 ASSERT_EQ(4U, Toks.size()); 1598 1599 ASSERT_EQ(tok::text, Toks[0].getKind()); 1600 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1601 1602 ASSERT_EQ(tok::text, Toks[1].getKind()); 1603 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1604 1605 ASSERT_EQ(tok::text, Toks[2].getKind()); 1606 ASSERT_EQ(StringRef("<"), Toks[2].getText()); 1607 1608 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1609} 1610 1611TEST_F(CommentLexerTest, HTMLCharacterReferences15) { 1612 const char *Source = "// & meow"; 1613 1614 std::vector<Token> Toks; 1615 1616 lexString(Source, Toks); 1617 1618 ASSERT_EQ(4U, Toks.size()); 1619 1620 ASSERT_EQ(tok::text, Toks[0].getKind()); 1621 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1622 1623 ASSERT_EQ(tok::text, Toks[1].getKind()); 1624 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1625 1626 ASSERT_EQ(tok::text, Toks[2].getKind()); 1627 ASSERT_EQ(StringRef(" meow"), Toks[2].getText()); 1628 1629 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1630} 1631 1632TEST_F(CommentLexerTest, HTMLCharacterReferences16) { 1633 const char *Sources[] = { 1634 "// =", 1635 "// =", 1636 "// =" 1637 }; 1638 1639 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1640 std::vector<Token> Toks; 1641 1642 lexString(Sources[i], Toks); 1643 1644 ASSERT_EQ(3U, Toks.size()); 1645 1646 ASSERT_EQ(tok::text, Toks[0].getKind()); 1647 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1648 1649 ASSERT_EQ(tok::text, Toks[1].getKind()); 1650 ASSERT_EQ(StringRef("="), Toks[1].getText()); 1651 1652 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1653 } 1654} 1655 1656TEST_F(CommentLexerTest, MultipleComments) { 1657 const char *Source = 1658 "// Aaa\n" 1659 "/// Bbb\n" 1660 "/* Ccc\n" 1661 " * Ddd*/\n" 1662 "/** Eee*/"; 1663 1664 std::vector<Token> Toks; 1665 1666 lexString(Source, Toks); 1667 1668 ASSERT_EQ(12U, Toks.size()); 1669 1670 ASSERT_EQ(tok::text, Toks[0].getKind()); 1671 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 1672 ASSERT_EQ(tok::newline, Toks[1].getKind()); 1673 1674 ASSERT_EQ(tok::text, Toks[2].getKind()); 1675 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 1676 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1677 1678 ASSERT_EQ(tok::text, Toks[4].getKind()); 1679 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 1680 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1681 1682 ASSERT_EQ(tok::text, Toks[6].getKind()); 1683 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText()); 1684 ASSERT_EQ(tok::newline, Toks[7].getKind()); 1685 ASSERT_EQ(tok::newline, Toks[8].getKind()); 1686 1687 ASSERT_EQ(tok::text, Toks[9].getKind()); 1688 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText()); 1689 1690 ASSERT_EQ(tok::newline, Toks[10].getKind()); 1691 ASSERT_EQ(tok::newline, Toks[11].getKind()); 1692} 1693 1694} // end namespace comments 1695} // end namespace clang 1696 1697