CommentLexer.cpp revision 44a3ddbf7d2f46a002b5e85b240359c435509b4e
1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "clang/AST/CommentLexer.h" 11#include "clang/AST/CommentCommandTraits.h" 12#include "clang/Basic/Diagnostic.h" 13#include "clang/Basic/DiagnosticOptions.h" 14#include "clang/Basic/FileManager.h" 15#include "clang/Basic/SourceManager.h" 16#include "llvm/ADT/STLExtras.h" 17#include "gtest/gtest.h" 18#include <vector> 19 20using namespace llvm; 21using namespace clang; 22 23namespace clang { 24namespace comments { 25 26namespace { 27class CommentLexerTest : public ::testing::Test { 28protected: 29 CommentLexerTest() 30 : FileMgr(FileMgrOpts), 31 DiagID(new DiagnosticIDs()), 32 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), 33 SourceMgr(Diags, FileMgr), 34 Traits(Allocator) { 35 } 36 37 FileSystemOptions FileMgrOpts; 38 FileManager FileMgr; 39 IntrusiveRefCntPtr<DiagnosticIDs> DiagID; 40 DiagnosticsEngine Diags; 41 SourceManager SourceMgr; 42 llvm::BumpPtrAllocator Allocator; 43 CommandTraits Traits; 44 45 void lexString(const char *Source, std::vector<Token> &Toks); 46 47 StringRef getCommandName(const Token &Tok) { 48 return Traits.getCommandInfo(Tok.getCommandID())->Name; 49 } 50 51 StringRef getVerbatimBlockName(const Token &Tok) { 52 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name; 53 } 54 55 StringRef getVerbatimLineName(const Token &Tok) { 56 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name; 57 } 58}; 59 60void CommentLexerTest::lexString(const char *Source, 61 std::vector<Token> &Toks) { 62 MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source); 63 FileID File = SourceMgr.createFileIDForMemBuffer(Buf); 64 SourceLocation Begin = SourceMgr.getLocForStartOfFile(File); 65 66 Lexer L(Allocator, Traits, Begin, Source, Source + strlen(Source)); 67 68 while (1) { 69 Token Tok; 70 L.lex(Tok); 71 if (Tok.is(tok::eof)) 72 break; 73 Toks.push_back(Tok); 74 } 75} 76 77} // unnamed namespace 78 79// Empty source range should be handled. 80TEST_F(CommentLexerTest, Basic1) { 81 const char *Source = ""; 82 std::vector<Token> Toks; 83 84 lexString(Source, Toks); 85 86 ASSERT_EQ(0U, Toks.size()); 87} 88 89// Empty comments should be handled. 90TEST_F(CommentLexerTest, Basic2) { 91 const char *Sources[] = { 92 "//", "///", "//!", "///<", "//!<" 93 }; 94 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 95 std::vector<Token> Toks; 96 97 lexString(Sources[i], Toks); 98 99 ASSERT_EQ(1U, Toks.size()); 100 101 ASSERT_EQ(tok::newline, Toks[0].getKind()); 102 } 103} 104 105// Empty comments should be handled. 106TEST_F(CommentLexerTest, Basic3) { 107 const char *Sources[] = { 108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/" 109 }; 110 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 111 std::vector<Token> Toks; 112 113 lexString(Sources[i], Toks); 114 115 ASSERT_EQ(2U, Toks.size()); 116 117 ASSERT_EQ(tok::newline, Toks[0].getKind()); 118 ASSERT_EQ(tok::newline, Toks[1].getKind()); 119 } 120} 121 122// Single comment with plain text. 123TEST_F(CommentLexerTest, Basic4) { 124 const char *Sources[] = { 125 "// Meow", "/// Meow", "//! Meow", 126 "// Meow\n", "// Meow\r\n", "//! Meow\r", 127 }; 128 129 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 130 std::vector<Token> Toks; 131 132 lexString(Sources[i], Toks); 133 134 ASSERT_EQ(2U, Toks.size()); 135 136 ASSERT_EQ(tok::text, Toks[0].getKind()); 137 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 138 139 ASSERT_EQ(tok::newline, Toks[1].getKind()); 140 } 141} 142 143// Single comment with plain text. 144TEST_F(CommentLexerTest, Basic5) { 145 const char *Sources[] = { 146 "/* Meow*/", "/** Meow*/", "/*! Meow*/" 147 }; 148 149 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 150 std::vector<Token> Toks; 151 152 lexString(Sources[i], Toks); 153 154 ASSERT_EQ(3U, Toks.size()); 155 156 ASSERT_EQ(tok::text, Toks[0].getKind()); 157 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); 158 159 ASSERT_EQ(tok::newline, Toks[1].getKind()); 160 ASSERT_EQ(tok::newline, Toks[2].getKind()); 161 } 162} 163 164// Test newline escaping. 165TEST_F(CommentLexerTest, Basic6) { 166 const char *Sources[] = { 167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n", 168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n", 169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" 170 }; 171 172 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 173 std::vector<Token> Toks; 174 175 lexString(Sources[i], Toks); 176 177 ASSERT_EQ(10U, Toks.size()); 178 179 ASSERT_EQ(tok::text, Toks[0].getKind()); 180 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 181 ASSERT_EQ(tok::text, Toks[1].getKind()); 182 ASSERT_EQ(StringRef("\\"), Toks[1].getText()); 183 ASSERT_EQ(tok::newline, Toks[2].getKind()); 184 185 ASSERT_EQ(tok::text, Toks[3].getKind()); 186 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText()); 187 ASSERT_EQ(tok::text, Toks[4].getKind()); 188 ASSERT_EQ(StringRef("\\"), Toks[4].getText()); 189 ASSERT_EQ(tok::text, Toks[5].getKind()); 190 ASSERT_EQ(StringRef(" "), Toks[5].getText()); 191 ASSERT_EQ(tok::newline, Toks[6].getKind()); 192 193 ASSERT_EQ(tok::text, Toks[7].getKind()); 194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText()); 195 ASSERT_EQ(tok::newline, Toks[8].getKind()); 196 197 ASSERT_EQ(tok::newline, Toks[9].getKind()); 198 } 199} 200 201// Check that we skip C-style aligned stars correctly. 202TEST_F(CommentLexerTest, Basic7) { 203 const char *Source = 204 "/* Aaa\n" 205 " * Bbb\r\n" 206 "\t* Ccc\n" 207 " ! Ddd\n" 208 " * Eee\n" 209 " ** Fff\n" 210 " */"; 211 std::vector<Token> Toks; 212 213 lexString(Source, Toks); 214 215 ASSERT_EQ(15U, Toks.size()); 216 217 ASSERT_EQ(tok::text, Toks[0].getKind()); 218 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 219 ASSERT_EQ(tok::newline, Toks[1].getKind()); 220 221 ASSERT_EQ(tok::text, Toks[2].getKind()); 222 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 223 ASSERT_EQ(tok::newline, Toks[3].getKind()); 224 225 ASSERT_EQ(tok::text, Toks[4].getKind()); 226 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 227 ASSERT_EQ(tok::newline, Toks[5].getKind()); 228 229 ASSERT_EQ(tok::text, Toks[6].getKind()); 230 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText()); 231 ASSERT_EQ(tok::newline, Toks[7].getKind()); 232 233 ASSERT_EQ(tok::text, Toks[8].getKind()); 234 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText()); 235 ASSERT_EQ(tok::newline, Toks[9].getKind()); 236 237 ASSERT_EQ(tok::text, Toks[10].getKind()); 238 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText()); 239 ASSERT_EQ(tok::newline, Toks[11].getKind()); 240 241 ASSERT_EQ(tok::text, Toks[12].getKind()); 242 ASSERT_EQ(StringRef(" "), Toks[12].getText()); 243 244 ASSERT_EQ(tok::newline, Toks[13].getKind()); 245 ASSERT_EQ(tok::newline, Toks[14].getKind()); 246} 247 248// A command marker followed by comment end. 249TEST_F(CommentLexerTest, DoxygenCommand1) { 250 const char *Sources[] = { "//@", "///@", "//!@" }; 251 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 252 std::vector<Token> Toks; 253 254 lexString(Sources[i], Toks); 255 256 ASSERT_EQ(2U, Toks.size()); 257 258 ASSERT_EQ(tok::text, Toks[0].getKind()); 259 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 260 261 ASSERT_EQ(tok::newline, Toks[1].getKind()); 262 } 263} 264 265// A command marker followed by comment end. 266TEST_F(CommentLexerTest, DoxygenCommand2) { 267 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"}; 268 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 269 std::vector<Token> Toks; 270 271 lexString(Sources[i], Toks); 272 273 ASSERT_EQ(3U, Toks.size()); 274 275 ASSERT_EQ(tok::text, Toks[0].getKind()); 276 ASSERT_EQ(StringRef("@"), Toks[0].getText()); 277 278 ASSERT_EQ(tok::newline, Toks[1].getKind()); 279 ASSERT_EQ(tok::newline, Toks[2].getKind()); 280 } 281} 282 283// A command marker followed by comment end. 284TEST_F(CommentLexerTest, DoxygenCommand3) { 285 const char *Sources[] = { "/*\\*/", "/**\\*/" }; 286 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 287 std::vector<Token> Toks; 288 289 lexString(Sources[i], Toks); 290 291 ASSERT_EQ(3U, Toks.size()); 292 293 ASSERT_EQ(tok::text, Toks[0].getKind()); 294 ASSERT_EQ(StringRef("\\"), Toks[0].getText()); 295 296 ASSERT_EQ(tok::newline, Toks[1].getKind()); 297 ASSERT_EQ(tok::newline, Toks[2].getKind()); 298 } 299} 300 301// Doxygen escape sequences. 302TEST_F(CommentLexerTest, DoxygenCommand4) { 303 const char *Source = 304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::"; 305 const char *Text[] = { 306 " ", 307 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ", 308 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ", 309 "::", "" 310 }; 311 312 std::vector<Token> Toks; 313 314 lexString(Source, Toks); 315 316 ASSERT_EQ(array_lengthof(Text), Toks.size()); 317 318 for (size_t i = 0, e = Toks.size(); i != e; i++) { 319 if(Toks[i].is(tok::text)) 320 ASSERT_EQ(StringRef(Text[i]), Toks[i].getText()) 321 << "index " << i; 322 } 323} 324 325// A command marker followed by a non-letter that is not a part of an escape 326// sequence. 327TEST_F(CommentLexerTest, DoxygenCommand5) { 328 const char *Source = "/// \\^ \\0"; 329 std::vector<Token> Toks; 330 331 lexString(Source, Toks); 332 333 ASSERT_EQ(6U, Toks.size()); 334 335 ASSERT_EQ(tok::text, Toks[0].getKind()); 336 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 337 338 ASSERT_EQ(tok::text, Toks[1].getKind()); 339 ASSERT_EQ(StringRef("\\"), Toks[1].getText()); 340 341 ASSERT_EQ(tok::text, Toks[2].getKind()); 342 ASSERT_EQ(StringRef("^ "), Toks[2].getText()); 343 344 ASSERT_EQ(tok::text, Toks[3].getKind()); 345 ASSERT_EQ(StringRef("\\"), Toks[3].getText()); 346 347 ASSERT_EQ(tok::text, Toks[4].getKind()); 348 ASSERT_EQ(StringRef("0"), Toks[4].getText()); 349 350 ASSERT_EQ(tok::newline, Toks[5].getKind()); 351} 352 353TEST_F(CommentLexerTest, DoxygenCommand6) { 354 const char *Source = "/// \\brief Aaa."; 355 std::vector<Token> Toks; 356 357 lexString(Source, Toks); 358 359 ASSERT_EQ(4U, Toks.size()); 360 361 ASSERT_EQ(tok::text, Toks[0].getKind()); 362 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 363 364 ASSERT_EQ(tok::command, Toks[1].getKind()); 365 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1])); 366 367 ASSERT_EQ(tok::text, Toks[2].getKind()); 368 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); 369 370 ASSERT_EQ(tok::newline, Toks[3].getKind()); 371} 372 373TEST_F(CommentLexerTest, DoxygenCommand7) { 374 const char *Source = "/// \\em\\em \\em\t\\em\n"; 375 std::vector<Token> Toks; 376 377 lexString(Source, Toks); 378 379 ASSERT_EQ(8U, Toks.size()); 380 381 ASSERT_EQ(tok::text, Toks[0].getKind()); 382 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 383 384 ASSERT_EQ(tok::command, Toks[1].getKind()); 385 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1])); 386 387 ASSERT_EQ(tok::command, Toks[2].getKind()); 388 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2])); 389 390 ASSERT_EQ(tok::text, Toks[3].getKind()); 391 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 392 393 ASSERT_EQ(tok::command, Toks[4].getKind()); 394 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4])); 395 396 ASSERT_EQ(tok::text, Toks[5].getKind()); 397 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 398 399 ASSERT_EQ(tok::command, Toks[6].getKind()); 400 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6])); 401 402 ASSERT_EQ(tok::newline, Toks[7].getKind()); 403} 404 405TEST_F(CommentLexerTest, DoxygenCommand8) { 406 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n"; 407 std::vector<Token> Toks; 408 409 lexString(Source, Toks); 410 411 ASSERT_EQ(8U, Toks.size()); 412 413 ASSERT_EQ(tok::text, Toks[0].getKind()); 414 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 415 416 ASSERT_EQ(tok::unknown_command, Toks[1].getKind()); 417 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName()); 418 419 ASSERT_EQ(tok::unknown_command, Toks[2].getKind()); 420 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName()); 421 422 ASSERT_EQ(tok::text, Toks[3].getKind()); 423 ASSERT_EQ(StringRef(" "), Toks[3].getText()); 424 425 ASSERT_EQ(tok::unknown_command, Toks[4].getKind()); 426 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName()); 427 428 ASSERT_EQ(tok::text, Toks[5].getKind()); 429 ASSERT_EQ(StringRef("\t"), Toks[5].getText()); 430 431 ASSERT_EQ(tok::unknown_command, Toks[6].getKind()); 432 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName()); 433 434 ASSERT_EQ(tok::newline, Toks[7].getKind()); 435} 436 437TEST_F(CommentLexerTest, DoxygenCommand9) { 438 const char *Source = "// \\c\n"; 439 std::vector<Token> Toks; 440 441 lexString(Source, Toks); 442 443 ASSERT_EQ(3U, Toks.size()); 444 445 ASSERT_EQ(tok::text, Toks[0].getKind()); 446 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 447 448 ASSERT_EQ(tok::command, Toks[1].getKind()); 449 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1])); 450 451 ASSERT_EQ(tok::newline, Toks[2].getKind()); 452} 453 454// Empty verbatim block. 455TEST_F(CommentLexerTest, VerbatimBlock1) { 456 const char *Sources[] = { 457 "/// \\verbatim\\endverbatim\n//", 458 "/** \\verbatim\\endverbatim*/" 459 }; 460 461 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 462 std::vector<Token> Toks; 463 464 lexString(Sources[i], Toks); 465 466 ASSERT_EQ(5U, Toks.size()); 467 468 ASSERT_EQ(tok::text, Toks[0].getKind()); 469 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 470 471 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 472 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 473 474 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); 475 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2])); 476 477 ASSERT_EQ(tok::newline, Toks[3].getKind()); 478 ASSERT_EQ(tok::newline, Toks[4].getKind()); 479 } 480} 481 482// Empty verbatim block without an end command. 483TEST_F(CommentLexerTest, VerbatimBlock2) { 484 const char *Source = "/// \\verbatim"; 485 486 std::vector<Token> Toks; 487 488 lexString(Source, Toks); 489 490 ASSERT_EQ(3U, Toks.size()); 491 492 ASSERT_EQ(tok::text, Toks[0].getKind()); 493 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 494 495 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 496 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 497 498 ASSERT_EQ(tok::newline, Toks[2].getKind()); 499} 500 501// Empty verbatim block without an end command. 502TEST_F(CommentLexerTest, VerbatimBlock3) { 503 const char *Source = "/** \\verbatim*/"; 504 505 std::vector<Token> Toks; 506 507 lexString(Source, Toks); 508 509 ASSERT_EQ(4U, Toks.size()); 510 511 ASSERT_EQ(tok::text, Toks[0].getKind()); 512 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 513 514 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 515 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 516 517 ASSERT_EQ(tok::newline, Toks[2].getKind()); 518 ASSERT_EQ(tok::newline, Toks[3].getKind()); 519} 520 521// Single-line verbatim block. 522TEST_F(CommentLexerTest, VerbatimBlock4) { 523 const char *Sources[] = { 524 "/// Meow \\verbatim aaa \\endverbatim\n//", 525 "/** Meow \\verbatim aaa \\endverbatim*/" 526 }; 527 528 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 529 std::vector<Token> Toks; 530 531 lexString(Sources[i], Toks); 532 533 ASSERT_EQ(6U, Toks.size()); 534 535 ASSERT_EQ(tok::text, Toks[0].getKind()); 536 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 537 538 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 539 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 540 541 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 542 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 543 544 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 545 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3])); 546 547 ASSERT_EQ(tok::newline, Toks[4].getKind()); 548 ASSERT_EQ(tok::newline, Toks[5].getKind()); 549 } 550} 551 552// Single-line verbatim block without an end command. 553TEST_F(CommentLexerTest, VerbatimBlock5) { 554 const char *Sources[] = { 555 "/// Meow \\verbatim aaa \n//", 556 "/** Meow \\verbatim aaa */" 557 }; 558 559 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 560 std::vector<Token> Toks; 561 562 lexString(Sources[i], Toks); 563 564 ASSERT_EQ(5U, Toks.size()); 565 566 ASSERT_EQ(tok::text, Toks[0].getKind()); 567 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 568 569 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 570 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 571 572 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 573 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); 574 575 ASSERT_EQ(tok::newline, Toks[3].getKind()); 576 ASSERT_EQ(tok::newline, Toks[4].getKind()); 577 } 578} 579 580TEST_F(CommentLexerTest, VerbatimBlock6) { 581 const char *Source = 582 "// \\verbatim\n" 583 "// Aaa\n" 584 "//\n" 585 "// Bbb\n" 586 "// \\endverbatim\n"; 587 588 std::vector<Token> Toks; 589 590 lexString(Source, Toks); 591 592 ASSERT_EQ(10U, Toks.size()); 593 594 ASSERT_EQ(tok::text, Toks[0].getKind()); 595 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 596 597 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 598 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 599 600 ASSERT_EQ(tok::newline, Toks[2].getKind()); 601 602 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 603 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText()); 604 605 ASSERT_EQ(tok::newline, Toks[4].getKind()); 606 607 ASSERT_EQ(tok::newline, Toks[5].getKind()); 608 609 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 610 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText()); 611 612 ASSERT_EQ(tok::newline, Toks[7].getKind()); 613 614 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind()); 615 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8])); 616 617 ASSERT_EQ(tok::newline, Toks[9].getKind()); 618} 619 620TEST_F(CommentLexerTest, VerbatimBlock7) { 621 const char *Source = 622 "/* \\verbatim\n" 623 " * Aaa\n" 624 " *\n" 625 " * Bbb\n" 626 " * \\endverbatim\n" 627 " */"; 628 629 std::vector<Token> Toks; 630 631 lexString(Source, Toks); 632 633 ASSERT_EQ(10U, Toks.size()); 634 635 ASSERT_EQ(tok::text, Toks[0].getKind()); 636 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 637 638 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 639 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 640 641 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 642 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText()); 643 644 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 645 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText()); 646 647 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 648 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText()); 649 650 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind()); 651 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5])); 652 653 ASSERT_EQ(tok::newline, Toks[6].getKind()); 654 655 ASSERT_EQ(tok::text, Toks[7].getKind()); 656 ASSERT_EQ(StringRef(" "), Toks[7].getText()); 657 658 ASSERT_EQ(tok::newline, Toks[8].getKind()); 659 ASSERT_EQ(tok::newline, Toks[9].getKind()); 660} 661 662// Complex test for verbatim blocks. 663TEST_F(CommentLexerTest, VerbatimBlock8) { 664 const char *Source = 665 "/* Meow \\verbatim aaa\\$\\@\n" 666 "bbb \\endverbati\r" 667 "ccc\r\n" 668 "ddd \\endverbatim Blah \\verbatim eee\n" 669 "\\endverbatim BlahBlah*/"; 670 std::vector<Token> Toks; 671 672 lexString(Source, Toks); 673 674 ASSERT_EQ(14U, Toks.size()); 675 676 ASSERT_EQ(tok::text, Toks[0].getKind()); 677 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); 678 679 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 680 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1])); 681 682 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 683 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText()); 684 685 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); 686 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText()); 687 688 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); 689 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText()); 690 691 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); 692 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText()); 693 694 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); 695 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6])); 696 697 ASSERT_EQ(tok::text, Toks[7].getKind()); 698 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText()); 699 700 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); 701 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8])); 702 703 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); 704 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText()); 705 706 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); 707 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10])); 708 709 ASSERT_EQ(tok::text, Toks[11].getKind()); 710 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText()); 711 712 ASSERT_EQ(tok::newline, Toks[12].getKind()); 713 ASSERT_EQ(tok::newline, Toks[13].getKind()); 714} 715 716// LaTeX verbatim blocks. 717TEST_F(CommentLexerTest, VerbatimBlock9) { 718 const char *Source = 719 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}"; 720 std::vector<Token> Toks; 721 722 lexString(Source, Toks); 723 724 ASSERT_EQ(13U, Toks.size()); 725 726 ASSERT_EQ(tok::text, Toks[0].getKind()); 727 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 728 729 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); 730 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1])); 731 732 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); 733 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText()); 734 735 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); 736 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3])); 737 738 ASSERT_EQ(tok::text, Toks[4].getKind()); 739 ASSERT_EQ(StringRef(" "), Toks[4].getText()); 740 741 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); 742 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5])); 743 744 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); 745 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText()); 746 747 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); 748 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7])); 749 750 ASSERT_EQ(tok::text, Toks[8].getKind()); 751 ASSERT_EQ(StringRef(" "), Toks[8].getText()); 752 753 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); 754 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9])); 755 756 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); 757 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText()); 758 759 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); 760 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11])); 761 762 ASSERT_EQ(tok::newline, Toks[12].getKind()); 763} 764 765// Empty verbatim line. 766TEST_F(CommentLexerTest, VerbatimLine1) { 767 const char *Sources[] = { 768 "/// \\fn\n//", 769 "/** \\fn*/" 770 }; 771 772 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 773 std::vector<Token> Toks; 774 775 lexString(Sources[i], Toks); 776 777 ASSERT_EQ(4U, Toks.size()); 778 779 ASSERT_EQ(tok::text, Toks[0].getKind()); 780 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 781 782 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 783 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 784 785 ASSERT_EQ(tok::newline, Toks[2].getKind()); 786 ASSERT_EQ(tok::newline, Toks[3].getKind()); 787 } 788} 789 790// Verbatim line with Doxygen escape sequences, which should not be expanded. 791TEST_F(CommentLexerTest, VerbatimLine2) { 792 const char *Sources[] = { 793 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//", 794 "/** \\fn void *foo(const char *zzz = \"\\$\");*/" 795 }; 796 797 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 798 std::vector<Token> Toks; 799 800 lexString(Sources[i], Toks); 801 802 ASSERT_EQ(5U, Toks.size()); 803 804 ASSERT_EQ(tok::text, Toks[0].getKind()); 805 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 806 807 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 808 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 809 810 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 811 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 812 Toks[2].getVerbatimLineText()); 813 814 ASSERT_EQ(tok::newline, Toks[3].getKind()); 815 ASSERT_EQ(tok::newline, Toks[4].getKind()); 816 } 817} 818 819// Verbatim line should not eat anything from next source line. 820TEST_F(CommentLexerTest, VerbatimLine3) { 821 const char *Source = 822 "/** \\fn void *foo(const char *zzz = \"\\$\");\n" 823 " * Meow\n" 824 " */"; 825 826 std::vector<Token> Toks; 827 828 lexString(Source, Toks); 829 830 ASSERT_EQ(9U, Toks.size()); 831 832 ASSERT_EQ(tok::text, Toks[0].getKind()); 833 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 834 835 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); 836 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1])); 837 838 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); 839 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), 840 Toks[2].getVerbatimLineText()); 841 ASSERT_EQ(tok::newline, Toks[3].getKind()); 842 843 ASSERT_EQ(tok::text, Toks[4].getKind()); 844 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText()); 845 ASSERT_EQ(tok::newline, Toks[5].getKind()); 846 847 ASSERT_EQ(tok::text, Toks[6].getKind()); 848 ASSERT_EQ(StringRef(" "), Toks[6].getText()); 849 850 ASSERT_EQ(tok::newline, Toks[7].getKind()); 851 ASSERT_EQ(tok::newline, Toks[8].getKind()); 852} 853 854TEST_F(CommentLexerTest, HTML1) { 855 const char *Source = 856 "// <"; 857 858 std::vector<Token> Toks; 859 860 lexString(Source, Toks); 861 862 ASSERT_EQ(3U, Toks.size()); 863 864 ASSERT_EQ(tok::text, Toks[0].getKind()); 865 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 866 867 ASSERT_EQ(tok::text, Toks[1].getKind()); 868 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 869 870 ASSERT_EQ(tok::newline, Toks[2].getKind()); 871} 872 873TEST_F(CommentLexerTest, HTML2) { 874 const char *Source = 875 "// a<2"; 876 877 std::vector<Token> Toks; 878 879 lexString(Source, Toks); 880 881 ASSERT_EQ(4U, Toks.size()); 882 883 ASSERT_EQ(tok::text, Toks[0].getKind()); 884 ASSERT_EQ(StringRef(" a"), Toks[0].getText()); 885 886 ASSERT_EQ(tok::text, Toks[1].getKind()); 887 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 888 889 ASSERT_EQ(tok::text, Toks[2].getKind()); 890 ASSERT_EQ(StringRef("2"), Toks[2].getText()); 891 892 ASSERT_EQ(tok::newline, Toks[3].getKind()); 893} 894 895TEST_F(CommentLexerTest, HTML3) { 896 const char *Source = 897 "// < img"; 898 899 std::vector<Token> Toks; 900 901 lexString(Source, Toks); 902 903 ASSERT_EQ(4U, Toks.size()); 904 905 ASSERT_EQ(tok::text, Toks[0].getKind()); 906 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 907 908 ASSERT_EQ(tok::text, Toks[1].getKind()); 909 ASSERT_EQ(StringRef("<"), Toks[1].getText()); 910 911 ASSERT_EQ(tok::text, Toks[2].getKind()); 912 ASSERT_EQ(StringRef(" img"), Toks[2].getText()); 913 914 ASSERT_EQ(tok::newline, Toks[3].getKind()); 915} 916 917TEST_F(CommentLexerTest, HTML4) { 918 const char *Sources[] = { 919 "// <img", 920 "// <img " 921 }; 922 923 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 924 std::vector<Token> Toks; 925 926 lexString(Sources[i], Toks); 927 928 ASSERT_EQ(3U, Toks.size()); 929 930 ASSERT_EQ(tok::text, Toks[0].getKind()); 931 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 932 933 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 934 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 935 936 ASSERT_EQ(tok::newline, Toks[2].getKind()); 937 } 938} 939 940TEST_F(CommentLexerTest, HTML5) { 941 const char *Source = 942 "// <img 42"; 943 944 std::vector<Token> Toks; 945 946 lexString(Source, Toks); 947 948 ASSERT_EQ(4U, Toks.size()); 949 950 ASSERT_EQ(tok::text, Toks[0].getKind()); 951 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 952 953 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 954 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 955 956 ASSERT_EQ(tok::text, Toks[2].getKind()); 957 ASSERT_EQ(StringRef("42"), Toks[2].getText()); 958 959 ASSERT_EQ(tok::newline, Toks[3].getKind()); 960} 961 962TEST_F(CommentLexerTest, HTML6) { 963 const char *Source = "// <img> Meow"; 964 965 std::vector<Token> Toks; 966 967 lexString(Source, Toks); 968 969 ASSERT_EQ(5U, Toks.size()); 970 971 ASSERT_EQ(tok::text, Toks[0].getKind()); 972 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 973 974 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 975 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 976 977 ASSERT_EQ(tok::html_greater, Toks[2].getKind()); 978 979 ASSERT_EQ(tok::text, Toks[3].getKind()); 980 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText()); 981 982 ASSERT_EQ(tok::newline, Toks[4].getKind()); 983} 984 985TEST_F(CommentLexerTest, HTML7) { 986 const char *Source = "// <img="; 987 988 std::vector<Token> Toks; 989 990 lexString(Source, Toks); 991 992 ASSERT_EQ(4U, Toks.size()); 993 994 ASSERT_EQ(tok::text, Toks[0].getKind()); 995 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 996 997 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 998 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 999 1000 ASSERT_EQ(tok::text, Toks[2].getKind()); 1001 ASSERT_EQ(StringRef("="), Toks[2].getText()); 1002 1003 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1004} 1005 1006TEST_F(CommentLexerTest, HTML8) { 1007 const char *Source = "// <img src=> Meow"; 1008 1009 std::vector<Token> Toks; 1010 1011 lexString(Source, Toks); 1012 1013 ASSERT_EQ(7U, Toks.size()); 1014 1015 ASSERT_EQ(tok::text, Toks[0].getKind()); 1016 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1017 1018 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1019 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1020 1021 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1022 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1023 1024 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1025 1026 ASSERT_EQ(tok::html_greater, Toks[4].getKind()); 1027 1028 ASSERT_EQ(tok::text, Toks[5].getKind()); 1029 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText()); 1030 1031 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1032} 1033 1034TEST_F(CommentLexerTest, HTML9) { 1035 const char *Sources[] = { 1036 "// <img src", 1037 "// <img src " 1038 }; 1039 1040 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1041 std::vector<Token> Toks; 1042 1043 lexString(Sources[i], Toks); 1044 1045 ASSERT_EQ(4U, Toks.size()); 1046 1047 ASSERT_EQ(tok::text, Toks[0].getKind()); 1048 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1049 1050 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1051 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1052 1053 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1054 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1055 1056 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1057 } 1058} 1059 1060TEST_F(CommentLexerTest, HTML10) { 1061 const char *Sources[] = { 1062 "// <img src=", 1063 "// <img src =" 1064 }; 1065 1066 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1067 std::vector<Token> Toks; 1068 1069 lexString(Sources[i], Toks); 1070 1071 ASSERT_EQ(5U, Toks.size()); 1072 1073 ASSERT_EQ(tok::text, Toks[0].getKind()); 1074 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1075 1076 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1077 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1078 1079 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1080 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1081 1082 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1083 1084 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1085 } 1086} 1087 1088TEST_F(CommentLexerTest, HTML11) { 1089 const char *Sources[] = { 1090 "// <img src=\"", 1091 "// <img src = \"", 1092 "// <img src=\'", 1093 "// <img src = \'" 1094 }; 1095 1096 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1097 std::vector<Token> Toks; 1098 1099 lexString(Sources[i], Toks); 1100 1101 ASSERT_EQ(6U, Toks.size()); 1102 1103 ASSERT_EQ(tok::text, Toks[0].getKind()); 1104 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1105 1106 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1107 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1108 1109 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1110 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1111 1112 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1113 1114 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1115 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString()); 1116 1117 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1118 } 1119} 1120 1121TEST_F(CommentLexerTest, HTML12) { 1122 const char *Source = "// <img src=@"; 1123 1124 std::vector<Token> Toks; 1125 1126 lexString(Source, Toks); 1127 1128 ASSERT_EQ(6U, Toks.size()); 1129 1130 ASSERT_EQ(tok::text, Toks[0].getKind()); 1131 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1132 1133 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1134 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1135 1136 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1137 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1138 1139 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1140 1141 ASSERT_EQ(tok::text, Toks[4].getKind()); 1142 ASSERT_EQ(StringRef("@"), Toks[4].getText()); 1143 1144 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1145} 1146 1147TEST_F(CommentLexerTest, HTML13) { 1148 const char *Sources[] = { 1149 "// <img src=\"val\\\"\\'val", 1150 "// <img src=\"val\\\"\\'val\"", 1151 "// <img src=\'val\\\"\\'val", 1152 "// <img src=\'val\\\"\\'val\'" 1153 }; 1154 1155 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1156 std::vector<Token> Toks; 1157 1158 lexString(Sources[i], Toks); 1159 1160 ASSERT_EQ(6U, Toks.size()); 1161 1162 ASSERT_EQ(tok::text, Toks[0].getKind()); 1163 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1164 1165 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1166 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1167 1168 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1169 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1170 1171 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1172 1173 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1174 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1175 1176 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1177 } 1178} 1179 1180TEST_F(CommentLexerTest, HTML14) { 1181 const char *Sources[] = { 1182 "// <img src=\"val\\\"\\'val\">", 1183 "// <img src=\'val\\\"\\'val\'>" 1184 }; 1185 1186 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1187 std::vector<Token> Toks; 1188 1189 lexString(Sources[i], Toks); 1190 1191 ASSERT_EQ(7U, Toks.size()); 1192 1193 ASSERT_EQ(tok::text, Toks[0].getKind()); 1194 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1195 1196 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1197 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1198 1199 ASSERT_EQ(tok::html_ident, Toks[2].getKind()); 1200 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent()); 1201 1202 ASSERT_EQ(tok::html_equals, Toks[3].getKind()); 1203 1204 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); 1205 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); 1206 1207 ASSERT_EQ(tok::html_greater, Toks[5].getKind()); 1208 1209 ASSERT_EQ(tok::newline, Toks[6].getKind()); 1210 } 1211} 1212 1213TEST_F(CommentLexerTest, HTML15) { 1214 const char *Sources[] = { 1215 "// <img/>", 1216 "// <img />" 1217 }; 1218 1219 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1220 std::vector<Token> Toks; 1221 1222 lexString(Sources[i], Toks); 1223 1224 ASSERT_EQ(4U, Toks.size()); 1225 1226 ASSERT_EQ(tok::text, Toks[0].getKind()); 1227 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1228 1229 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1230 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1231 1232 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind()); 1233 1234 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1235 } 1236} 1237 1238TEST_F(CommentLexerTest, HTML16) { 1239 const char *Sources[] = { 1240 "// <img/ Aaa", 1241 "// <img / Aaa" 1242 }; 1243 1244 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1245 std::vector<Token> Toks; 1246 1247 lexString(Sources[i], Toks); 1248 1249 ASSERT_EQ(5U, Toks.size()); 1250 1251 ASSERT_EQ(tok::text, Toks[0].getKind()); 1252 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1253 1254 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); 1255 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName()); 1256 1257 ASSERT_EQ(tok::text, Toks[2].getKind()); 1258 ASSERT_EQ(StringRef("/"), Toks[2].getText()); 1259 1260 ASSERT_EQ(tok::text, Toks[3].getKind()); 1261 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText()); 1262 1263 ASSERT_EQ(tok::newline, Toks[4].getKind()); 1264 } 1265} 1266 1267TEST_F(CommentLexerTest, HTML17) { 1268 const char *Source = "// </"; 1269 1270 std::vector<Token> Toks; 1271 1272 lexString(Source, Toks); 1273 1274 ASSERT_EQ(3U, Toks.size()); 1275 1276 ASSERT_EQ(tok::text, Toks[0].getKind()); 1277 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1278 1279 ASSERT_EQ(tok::text, Toks[1].getKind()); 1280 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1281 1282 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1283} 1284 1285TEST_F(CommentLexerTest, HTML18) { 1286 const char *Source = "// </@"; 1287 1288 std::vector<Token> Toks; 1289 1290 lexString(Source, Toks); 1291 1292 ASSERT_EQ(4U, Toks.size()); 1293 1294 ASSERT_EQ(tok::text, Toks[0].getKind()); 1295 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1296 1297 ASSERT_EQ(tok::text, Toks[1].getKind()); 1298 ASSERT_EQ(StringRef("</"), Toks[1].getText()); 1299 1300 ASSERT_EQ(tok::text, Toks[2].getKind()); 1301 ASSERT_EQ(StringRef("@"), Toks[2].getText()); 1302 1303 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1304} 1305 1306TEST_F(CommentLexerTest, HTML19) { 1307 const char *Source = "// </img"; 1308 1309 std::vector<Token> Toks; 1310 1311 lexString(Source, Toks); 1312 1313 ASSERT_EQ(3U, Toks.size()); 1314 1315 ASSERT_EQ(tok::text, Toks[0].getKind()); 1316 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1317 1318 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind()); 1319 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName()); 1320 1321 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1322} 1323 1324TEST_F(CommentLexerTest, NotAKnownHTMLTag1) { 1325 const char *Source = "// <tag>"; 1326 1327 std::vector<Token> Toks; 1328 1329 lexString(Source, Toks); 1330 1331 ASSERT_EQ(4U, Toks.size()); 1332 1333 ASSERT_EQ(tok::text, Toks[0].getKind()); 1334 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1335 1336 ASSERT_EQ(tok::text, Toks[1].getKind()); 1337 ASSERT_EQ(StringRef("<tag"), Toks[1].getText()); 1338 1339 ASSERT_EQ(tok::text, Toks[2].getKind()); 1340 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1341 1342 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1343} 1344 1345TEST_F(CommentLexerTest, NotAKnownHTMLTag2) { 1346 const char *Source = "// </tag>"; 1347 1348 std::vector<Token> Toks; 1349 1350 lexString(Source, Toks); 1351 1352 ASSERT_EQ(4U, Toks.size()); 1353 1354 ASSERT_EQ(tok::text, Toks[0].getKind()); 1355 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1356 1357 ASSERT_EQ(tok::text, Toks[1].getKind()); 1358 ASSERT_EQ(StringRef("</tag"), Toks[1].getText()); 1359 1360 ASSERT_EQ(tok::text, Toks[2].getKind()); 1361 ASSERT_EQ(StringRef(">"), Toks[2].getText()); 1362 1363 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1364} 1365 1366TEST_F(CommentLexerTest, HTMLCharacterReferences1) { 1367 const char *Source = "// &"; 1368 1369 std::vector<Token> Toks; 1370 1371 lexString(Source, Toks); 1372 1373 ASSERT_EQ(3U, Toks.size()); 1374 1375 ASSERT_EQ(tok::text, Toks[0].getKind()); 1376 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1377 1378 ASSERT_EQ(tok::text, Toks[1].getKind()); 1379 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1380 1381 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1382} 1383 1384TEST_F(CommentLexerTest, HTMLCharacterReferences2) { 1385 const char *Source = "// &!"; 1386 1387 std::vector<Token> Toks; 1388 1389 lexString(Source, Toks); 1390 1391 ASSERT_EQ(4U, Toks.size()); 1392 1393 ASSERT_EQ(tok::text, Toks[0].getKind()); 1394 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1395 1396 ASSERT_EQ(tok::text, Toks[1].getKind()); 1397 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1398 1399 ASSERT_EQ(tok::text, Toks[2].getKind()); 1400 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1401 1402 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1403} 1404 1405TEST_F(CommentLexerTest, HTMLCharacterReferences3) { 1406 const char *Source = "// &"; 1407 1408 std::vector<Token> Toks; 1409 1410 lexString(Source, Toks); 1411 1412 ASSERT_EQ(3U, Toks.size()); 1413 1414 ASSERT_EQ(tok::text, Toks[0].getKind()); 1415 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1416 1417 ASSERT_EQ(tok::text, Toks[1].getKind()); 1418 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1419 1420 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1421} 1422 1423TEST_F(CommentLexerTest, HTMLCharacterReferences4) { 1424 const char *Source = "// &!"; 1425 1426 std::vector<Token> Toks; 1427 1428 lexString(Source, Toks); 1429 1430 ASSERT_EQ(4U, Toks.size()); 1431 1432 ASSERT_EQ(tok::text, Toks[0].getKind()); 1433 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1434 1435 ASSERT_EQ(tok::text, Toks[1].getKind()); 1436 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1437 1438 ASSERT_EQ(tok::text, Toks[2].getKind()); 1439 ASSERT_EQ(StringRef("!"), Toks[2].getText()); 1440 1441 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1442} 1443 1444TEST_F(CommentLexerTest, HTMLCharacterReferences5) { 1445 const char *Source = "// &#"; 1446 1447 std::vector<Token> Toks; 1448 1449 lexString(Source, Toks); 1450 1451 ASSERT_EQ(3U, Toks.size()); 1452 1453 ASSERT_EQ(tok::text, Toks[0].getKind()); 1454 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1455 1456 ASSERT_EQ(tok::text, Toks[1].getKind()); 1457 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1458 1459 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1460} 1461 1462TEST_F(CommentLexerTest, HTMLCharacterReferences6) { 1463 const char *Source = "// &#a"; 1464 1465 std::vector<Token> Toks; 1466 1467 lexString(Source, Toks); 1468 1469 ASSERT_EQ(4U, Toks.size()); 1470 1471 ASSERT_EQ(tok::text, Toks[0].getKind()); 1472 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1473 1474 ASSERT_EQ(tok::text, Toks[1].getKind()); 1475 ASSERT_EQ(StringRef("&#"), Toks[1].getText()); 1476 1477 ASSERT_EQ(tok::text, Toks[2].getKind()); 1478 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1479 1480 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1481} 1482 1483TEST_F(CommentLexerTest, HTMLCharacterReferences7) { 1484 const char *Source = "// *"; 1485 1486 std::vector<Token> Toks; 1487 1488 lexString(Source, Toks); 1489 1490 ASSERT_EQ(3U, Toks.size()); 1491 1492 ASSERT_EQ(tok::text, Toks[0].getKind()); 1493 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1494 1495 ASSERT_EQ(tok::text, Toks[1].getKind()); 1496 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1497 1498 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1499} 1500 1501TEST_F(CommentLexerTest, HTMLCharacterReferences8) { 1502 const char *Source = "// *a"; 1503 1504 std::vector<Token> Toks; 1505 1506 lexString(Source, Toks); 1507 1508 ASSERT_EQ(4U, Toks.size()); 1509 1510 ASSERT_EQ(tok::text, Toks[0].getKind()); 1511 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1512 1513 ASSERT_EQ(tok::text, Toks[1].getKind()); 1514 ASSERT_EQ(StringRef("*"), Toks[1].getText()); 1515 1516 ASSERT_EQ(tok::text, Toks[2].getKind()); 1517 ASSERT_EQ(StringRef("a"), Toks[2].getText()); 1518 1519 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1520} 1521 1522TEST_F(CommentLexerTest, HTMLCharacterReferences9) { 1523 const char *Source = "// &#x"; 1524 1525 std::vector<Token> Toks; 1526 1527 lexString(Source, Toks); 1528 1529 ASSERT_EQ(3U, Toks.size()); 1530 1531 ASSERT_EQ(tok::text, Toks[0].getKind()); 1532 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1533 1534 ASSERT_EQ(tok::text, Toks[1].getKind()); 1535 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1536 1537 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1538} 1539 1540TEST_F(CommentLexerTest, HTMLCharacterReferences10) { 1541 const char *Source = "// &#xz"; 1542 1543 std::vector<Token> Toks; 1544 1545 lexString(Source, Toks); 1546 1547 ASSERT_EQ(4U, Toks.size()); 1548 1549 ASSERT_EQ(tok::text, Toks[0].getKind()); 1550 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1551 1552 ASSERT_EQ(tok::text, Toks[1].getKind()); 1553 ASSERT_EQ(StringRef("&#x"), Toks[1].getText()); 1554 1555 ASSERT_EQ(tok::text, Toks[2].getKind()); 1556 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1557 1558 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1559} 1560 1561TEST_F(CommentLexerTest, HTMLCharacterReferences11) { 1562 const char *Source = "// «"; 1563 1564 std::vector<Token> Toks; 1565 1566 lexString(Source, Toks); 1567 1568 ASSERT_EQ(3U, Toks.size()); 1569 1570 ASSERT_EQ(tok::text, Toks[0].getKind()); 1571 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1572 1573 ASSERT_EQ(tok::text, Toks[1].getKind()); 1574 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1575 1576 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1577} 1578 1579TEST_F(CommentLexerTest, HTMLCharacterReferences12) { 1580 const char *Source = "// «z"; 1581 1582 std::vector<Token> Toks; 1583 1584 lexString(Source, Toks); 1585 1586 ASSERT_EQ(4U, Toks.size()); 1587 1588 ASSERT_EQ(tok::text, Toks[0].getKind()); 1589 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1590 1591 ASSERT_EQ(tok::text, Toks[1].getKind()); 1592 ASSERT_EQ(StringRef("«"), Toks[1].getText()); 1593 1594 ASSERT_EQ(tok::text, Toks[2].getKind()); 1595 ASSERT_EQ(StringRef("z"), Toks[2].getText()); 1596 1597 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1598} 1599 1600TEST_F(CommentLexerTest, HTMLCharacterReferences13) { 1601 const char *Source = "// &"; 1602 1603 std::vector<Token> Toks; 1604 1605 lexString(Source, Toks); 1606 1607 ASSERT_EQ(3U, Toks.size()); 1608 1609 ASSERT_EQ(tok::text, Toks[0].getKind()); 1610 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1611 1612 ASSERT_EQ(tok::text, Toks[1].getKind()); 1613 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1614 1615 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1616} 1617 1618TEST_F(CommentLexerTest, HTMLCharacterReferences14) { 1619 const char *Source = "// &<"; 1620 1621 std::vector<Token> Toks; 1622 1623 lexString(Source, Toks); 1624 1625 ASSERT_EQ(4U, Toks.size()); 1626 1627 ASSERT_EQ(tok::text, Toks[0].getKind()); 1628 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1629 1630 ASSERT_EQ(tok::text, Toks[1].getKind()); 1631 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1632 1633 ASSERT_EQ(tok::text, Toks[2].getKind()); 1634 ASSERT_EQ(StringRef("<"), Toks[2].getText()); 1635 1636 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1637} 1638 1639TEST_F(CommentLexerTest, HTMLCharacterReferences15) { 1640 const char *Source = "// & meow"; 1641 1642 std::vector<Token> Toks; 1643 1644 lexString(Source, Toks); 1645 1646 ASSERT_EQ(4U, Toks.size()); 1647 1648 ASSERT_EQ(tok::text, Toks[0].getKind()); 1649 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1650 1651 ASSERT_EQ(tok::text, Toks[1].getKind()); 1652 ASSERT_EQ(StringRef("&"), Toks[1].getText()); 1653 1654 ASSERT_EQ(tok::text, Toks[2].getKind()); 1655 ASSERT_EQ(StringRef(" meow"), Toks[2].getText()); 1656 1657 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1658} 1659 1660TEST_F(CommentLexerTest, HTMLCharacterReferences16) { 1661 const char *Sources[] = { 1662 "// =", 1663 "// =", 1664 "// =", 1665 "// =" 1666 }; 1667 1668 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { 1669 std::vector<Token> Toks; 1670 1671 lexString(Sources[i], Toks); 1672 1673 ASSERT_EQ(3U, Toks.size()); 1674 1675 ASSERT_EQ(tok::text, Toks[0].getKind()); 1676 ASSERT_EQ(StringRef(" "), Toks[0].getText()); 1677 1678 ASSERT_EQ(tok::text, Toks[1].getKind()); 1679 ASSERT_EQ(StringRef("="), Toks[1].getText()); 1680 1681 ASSERT_EQ(tok::newline, Toks[2].getKind()); 1682 } 1683} 1684 1685TEST_F(CommentLexerTest, MultipleComments) { 1686 const char *Source = 1687 "// Aaa\n" 1688 "/// Bbb\n" 1689 "/* Ccc\n" 1690 " * Ddd*/\n" 1691 "/** Eee*/"; 1692 1693 std::vector<Token> Toks; 1694 1695 lexString(Source, Toks); 1696 1697 ASSERT_EQ(12U, Toks.size()); 1698 1699 ASSERT_EQ(tok::text, Toks[0].getKind()); 1700 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); 1701 ASSERT_EQ(tok::newline, Toks[1].getKind()); 1702 1703 ASSERT_EQ(tok::text, Toks[2].getKind()); 1704 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); 1705 ASSERT_EQ(tok::newline, Toks[3].getKind()); 1706 1707 ASSERT_EQ(tok::text, Toks[4].getKind()); 1708 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); 1709 ASSERT_EQ(tok::newline, Toks[5].getKind()); 1710 1711 ASSERT_EQ(tok::text, Toks[6].getKind()); 1712 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText()); 1713 ASSERT_EQ(tok::newline, Toks[7].getKind()); 1714 ASSERT_EQ(tok::newline, Toks[8].getKind()); 1715 1716 ASSERT_EQ(tok::text, Toks[9].getKind()); 1717 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText()); 1718 1719 ASSERT_EQ(tok::newline, Toks[10].getKind()); 1720 ASSERT_EQ(tok::newline, Toks[11].getKind()); 1721} 1722 1723} // end namespace comments 1724} // end namespace clang 1725 1726