1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/Basic/SourceManager.h"
11#include "clang/Basic/FileManager.h"
12#include "clang/Basic/Diagnostic.h"
13#include "clang/AST/CommentLexer.h"
14#include "clang/AST/CommentCommandTraits.h"
15#include "llvm/ADT/STLExtras.h"
16#include <vector>
17
18#include "gtest/gtest.h"
19
20using namespace llvm;
21using namespace clang;
22
23namespace clang {
24namespace comments {
25
26namespace {
27class CommentLexerTest : public ::testing::Test {
28protected:
29  CommentLexerTest()
30    : FileMgr(FileMgrOpts),
31      DiagID(new DiagnosticIDs()),
32      Diags(DiagID, new IgnoringDiagConsumer()),
33      SourceMgr(Diags, FileMgr),
34      Traits(Allocator) {
35  }
36
37  FileSystemOptions FileMgrOpts;
38  FileManager FileMgr;
39  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
40  DiagnosticsEngine Diags;
41  SourceManager SourceMgr;
42  llvm::BumpPtrAllocator Allocator;
43  CommandTraits Traits;
44
45  void lexString(const char *Source, std::vector<Token> &Toks);
46
47  StringRef getCommandName(const Token &Tok) {
48    return Traits.getCommandInfo(Tok.getCommandID())->Name;
49  }
50
51  StringRef getVerbatimBlockName(const Token &Tok) {
52    return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
53  }
54
55  StringRef getVerbatimLineName(const Token &Tok) {
56    return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
57  }
58};
59
60void CommentLexerTest::lexString(const char *Source,
61                                 std::vector<Token> &Toks) {
62  MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
63  FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
64  SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
65
66  Lexer L(Allocator, Traits, Begin, Source, Source + strlen(Source));
67
68  while (1) {
69    Token Tok;
70    L.lex(Tok);
71    if (Tok.is(tok::eof))
72      break;
73    Toks.push_back(Tok);
74  }
75}
76
77} // unnamed namespace
78
79// Empty source range should be handled.
80TEST_F(CommentLexerTest, Basic1) {
81  const char *Source = "";
82  std::vector<Token> Toks;
83
84  lexString(Source, Toks);
85
86  ASSERT_EQ(0U, Toks.size());
87}
88
89// Empty comments should be handled.
90TEST_F(CommentLexerTest, Basic2) {
91  const char *Sources[] = {
92    "//", "///", "//!", "///<", "//!<"
93  };
94  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
95    std::vector<Token> Toks;
96
97    lexString(Sources[i], Toks);
98
99    ASSERT_EQ(1U, Toks.size());
100
101    ASSERT_EQ(tok::newline, Toks[0].getKind());
102  }
103}
104
105// Empty comments should be handled.
106TEST_F(CommentLexerTest, Basic3) {
107  const char *Sources[] = {
108    "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
109  };
110  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
111    std::vector<Token> Toks;
112
113    lexString(Sources[i], Toks);
114
115    ASSERT_EQ(2U, Toks.size());
116
117    ASSERT_EQ(tok::newline, Toks[0].getKind());
118    ASSERT_EQ(tok::newline, Toks[1].getKind());
119  }
120}
121
122// Single comment with plain text.
123TEST_F(CommentLexerTest, Basic4) {
124  const char *Sources[] = {
125    "// Meow",   "/// Meow",    "//! Meow",
126    "// Meow\n", "// Meow\r\n", "//! Meow\r",
127  };
128
129  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
130    std::vector<Token> Toks;
131
132    lexString(Sources[i], Toks);
133
134    ASSERT_EQ(2U, Toks.size());
135
136    ASSERT_EQ(tok::text,          Toks[0].getKind());
137    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
138
139    ASSERT_EQ(tok::newline,       Toks[1].getKind());
140  }
141}
142
143// Single comment with plain text.
144TEST_F(CommentLexerTest, Basic5) {
145  const char *Sources[] = {
146    "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
147  };
148
149  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
150    std::vector<Token> Toks;
151
152    lexString(Sources[i], Toks);
153
154    ASSERT_EQ(3U, Toks.size());
155
156    ASSERT_EQ(tok::text,          Toks[0].getKind());
157    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
158
159    ASSERT_EQ(tok::newline,       Toks[1].getKind());
160    ASSERT_EQ(tok::newline,       Toks[2].getKind());
161  }
162}
163
164// Test newline escaping.
165TEST_F(CommentLexerTest, Basic6) {
166  const char *Sources[] = {
167    "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
168    "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169    "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
170  };
171
172  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
173    std::vector<Token> Toks;
174
175    lexString(Sources[i], Toks);
176
177    ASSERT_EQ(10U, Toks.size());
178
179    ASSERT_EQ(tok::text,         Toks[0].getKind());
180    ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
181    ASSERT_EQ(tok::text,         Toks[1].getKind());
182    ASSERT_EQ(StringRef("\\"),   Toks[1].getText());
183    ASSERT_EQ(tok::newline,      Toks[2].getKind());
184
185    ASSERT_EQ(tok::text,         Toks[3].getKind());
186    ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
187    ASSERT_EQ(tok::text,         Toks[4].getKind());
188    ASSERT_EQ(StringRef("\\"),   Toks[4].getText());
189    ASSERT_EQ(tok::text,         Toks[5].getKind());
190    ASSERT_EQ(StringRef(" "),    Toks[5].getText());
191    ASSERT_EQ(tok::newline,      Toks[6].getKind());
192
193    ASSERT_EQ(tok::text,         Toks[7].getKind());
194    ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
195    ASSERT_EQ(tok::newline,      Toks[8].getKind());
196
197    ASSERT_EQ(tok::newline,      Toks[9].getKind());
198  }
199}
200
201// Check that we skip C-style aligned stars correctly.
202TEST_F(CommentLexerTest, Basic7) {
203  const char *Source =
204    "/* Aaa\n"
205    " * Bbb\r\n"
206    "\t* Ccc\n"
207    "  ! Ddd\n"
208    "  * Eee\n"
209    "  ** Fff\n"
210    " */";
211  std::vector<Token> Toks;
212
213  lexString(Source, Toks);
214
215  ASSERT_EQ(15U, Toks.size());
216
217  ASSERT_EQ(tok::text,         Toks[0].getKind());
218  ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
219  ASSERT_EQ(tok::newline,      Toks[1].getKind());
220
221  ASSERT_EQ(tok::text,         Toks[2].getKind());
222  ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
223  ASSERT_EQ(tok::newline,      Toks[3].getKind());
224
225  ASSERT_EQ(tok::text,         Toks[4].getKind());
226  ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
227  ASSERT_EQ(tok::newline,      Toks[5].getKind());
228
229  ASSERT_EQ(tok::text,            Toks[6].getKind());
230  ASSERT_EQ(StringRef("  ! Ddd"), Toks[6].getText());
231  ASSERT_EQ(tok::newline,         Toks[7].getKind());
232
233  ASSERT_EQ(tok::text,         Toks[8].getKind());
234  ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
235  ASSERT_EQ(tok::newline,      Toks[9].getKind());
236
237  ASSERT_EQ(tok::text,          Toks[10].getKind());
238  ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
239  ASSERT_EQ(tok::newline,       Toks[11].getKind());
240
241  ASSERT_EQ(tok::text,         Toks[12].getKind());
242  ASSERT_EQ(StringRef(" "),    Toks[12].getText());
243
244  ASSERT_EQ(tok::newline,      Toks[13].getKind());
245  ASSERT_EQ(tok::newline,      Toks[14].getKind());
246}
247
248// A command marker followed by comment end.
249TEST_F(CommentLexerTest, DoxygenCommand1) {
250  const char *Sources[] = { "//@", "///@", "//!@" };
251  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
252    std::vector<Token> Toks;
253
254    lexString(Sources[i], Toks);
255
256    ASSERT_EQ(2U, Toks.size());
257
258    ASSERT_EQ(tok::text,          Toks[0].getKind());
259    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
260
261    ASSERT_EQ(tok::newline,       Toks[1].getKind());
262  }
263}
264
265// A command marker followed by comment end.
266TEST_F(CommentLexerTest, DoxygenCommand2) {
267  const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
268  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
269    std::vector<Token> Toks;
270
271    lexString(Sources[i], Toks);
272
273    ASSERT_EQ(3U, Toks.size());
274
275    ASSERT_EQ(tok::text,          Toks[0].getKind());
276    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
277
278    ASSERT_EQ(tok::newline,       Toks[1].getKind());
279    ASSERT_EQ(tok::newline,       Toks[2].getKind());
280  }
281}
282
283// A command marker followed by comment end.
284TEST_F(CommentLexerTest, DoxygenCommand3) {
285  const char *Sources[] = { "/*\\*/", "/**\\*/" };
286  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
287    std::vector<Token> Toks;
288
289    lexString(Sources[i], Toks);
290
291    ASSERT_EQ(3U, Toks.size());
292
293    ASSERT_EQ(tok::text,           Toks[0].getKind());
294    ASSERT_EQ(StringRef("\\"),     Toks[0].getText());
295
296    ASSERT_EQ(tok::newline,        Toks[1].getKind());
297    ASSERT_EQ(tok::newline,        Toks[2].getKind());
298  }
299}
300
301// Doxygen escape sequences.
302TEST_F(CommentLexerTest, DoxygenCommand4) {
303  const char *Source =
304    "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
305  const char *Text[] = {
306    " ",
307    "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
308    "<",  " ", ">", " ", "%", " ", "\"", " ", ".", " ",
309    "::", ""
310  };
311
312  std::vector<Token> Toks;
313
314  lexString(Source, Toks);
315
316  ASSERT_EQ(array_lengthof(Text), Toks.size());
317
318  for (size_t i = 0, e = Toks.size(); i != e; i++) {
319    if(Toks[i].is(tok::text))
320      ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
321        << "index " << i;
322  }
323}
324
325TEST_F(CommentLexerTest, DoxygenCommand5) {
326  const char *Source = "/// \\brief Aaa.";
327  std::vector<Token> Toks;
328
329  lexString(Source, Toks);
330
331  ASSERT_EQ(4U, Toks.size());
332
333  ASSERT_EQ(tok::text,          Toks[0].getKind());
334  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
335
336  ASSERT_EQ(tok::command,       Toks[1].getKind());
337  ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
338
339  ASSERT_EQ(tok::text,          Toks[2].getKind());
340  ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
341
342  ASSERT_EQ(tok::newline,       Toks[3].getKind());
343}
344
345TEST_F(CommentLexerTest, DoxygenCommand6) {
346  const char *Source = "/// \\em\\em \\em\t\\em\n";
347  std::vector<Token> Toks;
348
349  lexString(Source, Toks);
350
351  ASSERT_EQ(8U, Toks.size());
352
353  ASSERT_EQ(tok::text,       Toks[0].getKind());
354  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
355
356  ASSERT_EQ(tok::command,    Toks[1].getKind());
357  ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
358
359  ASSERT_EQ(tok::command,    Toks[2].getKind());
360  ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
361
362  ASSERT_EQ(tok::text,       Toks[3].getKind());
363  ASSERT_EQ(StringRef(" "),  Toks[3].getText());
364
365  ASSERT_EQ(tok::command,    Toks[4].getKind());
366  ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
367
368  ASSERT_EQ(tok::text,       Toks[5].getKind());
369  ASSERT_EQ(StringRef("\t"), Toks[5].getText());
370
371  ASSERT_EQ(tok::command,    Toks[6].getKind());
372  ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
373
374  ASSERT_EQ(tok::newline,    Toks[7].getKind());
375}
376
377TEST_F(CommentLexerTest, DoxygenCommand7) {
378  const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
379  std::vector<Token> Toks;
380
381  lexString(Source, Toks);
382
383  ASSERT_EQ(8U, Toks.size());
384
385  ASSERT_EQ(tok::text,        Toks[0].getKind());
386  ASSERT_EQ(StringRef(" "),   Toks[0].getText());
387
388  ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
389  ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
390
391  ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
392  ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
393
394  ASSERT_EQ(tok::text,        Toks[3].getKind());
395  ASSERT_EQ(StringRef(" "),   Toks[3].getText());
396
397  ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
398  ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
399
400  ASSERT_EQ(tok::text,        Toks[5].getKind());
401  ASSERT_EQ(StringRef("\t"),  Toks[5].getText());
402
403  ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
404  ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
405
406  ASSERT_EQ(tok::newline,     Toks[7].getKind());
407}
408
409TEST_F(CommentLexerTest, DoxygenCommand8) {
410  const char *Source = "// \\c\n";
411  std::vector<Token> Toks;
412
413  lexString(Source, Toks);
414
415  ASSERT_EQ(3U, Toks.size());
416
417  ASSERT_EQ(tok::text,      Toks[0].getKind());
418  ASSERT_EQ(StringRef(" "), Toks[0].getText());
419
420  ASSERT_EQ(tok::command,   Toks[1].getKind());
421  ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
422
423  ASSERT_EQ(tok::newline,   Toks[2].getKind());
424}
425
426// Empty verbatim block.
427TEST_F(CommentLexerTest, VerbatimBlock1) {
428  const char *Sources[] = {
429    "/// \\verbatim\\endverbatim\n//",
430    "/** \\verbatim\\endverbatim*/"
431  };
432
433  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
434    std::vector<Token> Toks;
435
436    lexString(Sources[i], Toks);
437
438    ASSERT_EQ(5U, Toks.size());
439
440    ASSERT_EQ(tok::text,                 Toks[0].getKind());
441    ASSERT_EQ(StringRef(" "),            Toks[0].getText());
442
443    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
444    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
445
446    ASSERT_EQ(tok::verbatim_block_end,   Toks[2].getKind());
447    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[2]));
448
449    ASSERT_EQ(tok::newline,              Toks[3].getKind());
450    ASSERT_EQ(tok::newline,              Toks[4].getKind());
451  }
452}
453
454// Empty verbatim block without an end command.
455TEST_F(CommentLexerTest, VerbatimBlock2) {
456  const char *Source = "/// \\verbatim";
457
458  std::vector<Token> Toks;
459
460  lexString(Source, Toks);
461
462  ASSERT_EQ(3U, Toks.size());
463
464  ASSERT_EQ(tok::text,                 Toks[0].getKind());
465  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
466
467  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
468  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
469
470  ASSERT_EQ(tok::newline,              Toks[2].getKind());
471}
472
473// Empty verbatim block without an end command.
474TEST_F(CommentLexerTest, VerbatimBlock3) {
475  const char *Source = "/** \\verbatim*/";
476
477  std::vector<Token> Toks;
478
479  lexString(Source, Toks);
480
481  ASSERT_EQ(4U, Toks.size());
482
483  ASSERT_EQ(tok::text,                 Toks[0].getKind());
484  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
485
486  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
487  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
488
489  ASSERT_EQ(tok::newline,              Toks[2].getKind());
490  ASSERT_EQ(tok::newline,              Toks[3].getKind());
491}
492
493// Single-line verbatim block.
494TEST_F(CommentLexerTest, VerbatimBlock4) {
495  const char *Sources[] = {
496    "/// Meow \\verbatim aaa \\endverbatim\n//",
497    "/** Meow \\verbatim aaa \\endverbatim*/"
498  };
499
500  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
501    std::vector<Token> Toks;
502
503    lexString(Sources[i], Toks);
504
505    ASSERT_EQ(6U, Toks.size());
506
507    ASSERT_EQ(tok::text,                 Toks[0].getKind());
508    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
509
510    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
511    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
512
513    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
514    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
515
516    ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
517    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[3]));
518
519    ASSERT_EQ(tok::newline,              Toks[4].getKind());
520    ASSERT_EQ(tok::newline,              Toks[5].getKind());
521  }
522}
523
524// Single-line verbatim block without an end command.
525TEST_F(CommentLexerTest, VerbatimBlock5) {
526  const char *Sources[] = {
527    "/// Meow \\verbatim aaa \n//",
528    "/** Meow \\verbatim aaa */"
529  };
530
531  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
532    std::vector<Token> Toks;
533
534    lexString(Sources[i], Toks);
535
536    ASSERT_EQ(5U, Toks.size());
537
538    ASSERT_EQ(tok::text,                 Toks[0].getKind());
539    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
540
541    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
542    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
543
544    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
545    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
546
547    ASSERT_EQ(tok::newline,              Toks[3].getKind());
548    ASSERT_EQ(tok::newline,              Toks[4].getKind());
549  }
550}
551
552TEST_F(CommentLexerTest, VerbatimBlock6) {
553  const char *Source =
554    "// \\verbatim\n"
555    "// Aaa\n"
556    "//\n"
557    "// Bbb\n"
558    "// \\endverbatim\n";
559
560  std::vector<Token> Toks;
561
562  lexString(Source, Toks);
563
564  ASSERT_EQ(10U, Toks.size());
565
566  ASSERT_EQ(tok::text,                 Toks[0].getKind());
567  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
568
569  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
570  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
571
572  ASSERT_EQ(tok::newline,              Toks[2].getKind());
573
574  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
575  ASSERT_EQ(StringRef(" Aaa"),         Toks[3].getVerbatimBlockText());
576
577  ASSERT_EQ(tok::newline,              Toks[4].getKind());
578
579  ASSERT_EQ(tok::newline,              Toks[5].getKind());
580
581  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
582  ASSERT_EQ(StringRef(" Bbb"),         Toks[6].getVerbatimBlockText());
583
584  ASSERT_EQ(tok::newline,              Toks[7].getKind());
585
586  ASSERT_EQ(tok::verbatim_block_end,   Toks[8].getKind());
587  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[8]));
588
589  ASSERT_EQ(tok::newline,              Toks[9].getKind());
590}
591
592TEST_F(CommentLexerTest, VerbatimBlock7) {
593  const char *Source =
594    "/* \\verbatim\n"
595    " * Aaa\n"
596    " *\n"
597    " * Bbb\n"
598    " * \\endverbatim\n"
599    " */";
600
601  std::vector<Token> Toks;
602
603  lexString(Source, Toks);
604
605  ASSERT_EQ(10U, Toks.size());
606
607  ASSERT_EQ(tok::text,                 Toks[0].getKind());
608  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
609
610  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
611  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
612
613  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
614  ASSERT_EQ(StringRef(" Aaa"),         Toks[2].getVerbatimBlockText());
615
616  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
617  ASSERT_EQ(StringRef(""),             Toks[3].getVerbatimBlockText());
618
619  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
620  ASSERT_EQ(StringRef(" Bbb"),         Toks[4].getVerbatimBlockText());
621
622  ASSERT_EQ(tok::verbatim_block_end,   Toks[5].getKind());
623  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[5]));
624
625  ASSERT_EQ(tok::newline,              Toks[6].getKind());
626
627  ASSERT_EQ(tok::text,                 Toks[7].getKind());
628  ASSERT_EQ(StringRef(" "),            Toks[7].getText());
629
630  ASSERT_EQ(tok::newline,              Toks[8].getKind());
631  ASSERT_EQ(tok::newline,              Toks[9].getKind());
632}
633
634// Complex test for verbatim blocks.
635TEST_F(CommentLexerTest, VerbatimBlock8) {
636  const char *Source =
637    "/* Meow \\verbatim aaa\\$\\@\n"
638    "bbb \\endverbati\r"
639    "ccc\r\n"
640    "ddd \\endverbatim Blah \\verbatim eee\n"
641    "\\endverbatim BlahBlah*/";
642  std::vector<Token> Toks;
643
644  lexString(Source, Toks);
645
646  ASSERT_EQ(14U, Toks.size());
647
648  ASSERT_EQ(tok::text,                 Toks[0].getKind());
649  ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
650
651  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
652  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
653
654  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
655  ASSERT_EQ(StringRef(" aaa\\$\\@"),   Toks[2].getVerbatimBlockText());
656
657  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
658  ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
659
660  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
661  ASSERT_EQ(StringRef("ccc"),          Toks[4].getVerbatimBlockText());
662
663  ASSERT_EQ(tok::verbatim_block_line,  Toks[5].getKind());
664  ASSERT_EQ(StringRef("ddd "),         Toks[5].getVerbatimBlockText());
665
666  ASSERT_EQ(tok::verbatim_block_end,   Toks[6].getKind());
667  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[6]));
668
669  ASSERT_EQ(tok::text,                 Toks[7].getKind());
670  ASSERT_EQ(StringRef(" Blah "),       Toks[7].getText());
671
672  ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
673  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[8]));
674
675  ASSERT_EQ(tok::verbatim_block_line,  Toks[9].getKind());
676  ASSERT_EQ(StringRef(" eee"),         Toks[9].getVerbatimBlockText());
677
678  ASSERT_EQ(tok::verbatim_block_end,   Toks[10].getKind());
679  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[10]));
680
681  ASSERT_EQ(tok::text,                 Toks[11].getKind());
682  ASSERT_EQ(StringRef(" BlahBlah"),    Toks[11].getText());
683
684  ASSERT_EQ(tok::newline,              Toks[12].getKind());
685  ASSERT_EQ(tok::newline,              Toks[13].getKind());
686}
687
688// LaTeX verbatim blocks.
689TEST_F(CommentLexerTest, VerbatimBlock9) {
690  const char *Source =
691    "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
692  std::vector<Token> Toks;
693
694  lexString(Source, Toks);
695
696  ASSERT_EQ(13U, Toks.size());
697
698  ASSERT_EQ(tok::text,                 Toks[0].getKind());
699  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
700
701  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
702  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[1]));
703
704  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
705  ASSERT_EQ(StringRef(" Aaa "),        Toks[2].getVerbatimBlockText());
706
707  ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
708  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[3]));
709
710  ASSERT_EQ(tok::text,                 Toks[4].getKind());
711  ASSERT_EQ(StringRef(" "),            Toks[4].getText());
712
713  ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
714  ASSERT_EQ(StringRef("f["),           getVerbatimBlockName(Toks[5]));
715
716  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
717  ASSERT_EQ(StringRef(" Bbb "),        Toks[6].getVerbatimBlockText());
718
719  ASSERT_EQ(tok::verbatim_block_end,   Toks[7].getKind());
720  ASSERT_EQ(StringRef("f]"),           getVerbatimBlockName(Toks[7]));
721
722  ASSERT_EQ(tok::text,                 Toks[8].getKind());
723  ASSERT_EQ(StringRef(" "),            Toks[8].getText());
724
725  ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
726  ASSERT_EQ(StringRef("f{"),           getVerbatimBlockName(Toks[9]));
727
728  ASSERT_EQ(tok::verbatim_block_line,  Toks[10].getKind());
729  ASSERT_EQ(StringRef(" Ccc "),        Toks[10].getVerbatimBlockText());
730
731  ASSERT_EQ(tok::verbatim_block_end,   Toks[11].getKind());
732  ASSERT_EQ(StringRef("f}"),           getVerbatimBlockName(Toks[11]));
733
734  ASSERT_EQ(tok::newline,              Toks[12].getKind());
735}
736
737// Empty verbatim line.
738TEST_F(CommentLexerTest, VerbatimLine1) {
739  const char *Sources[] = {
740    "/// \\fn\n//",
741    "/** \\fn*/"
742  };
743
744  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
745    std::vector<Token> Toks;
746
747    lexString(Sources[i], Toks);
748
749    ASSERT_EQ(4U, Toks.size());
750
751    ASSERT_EQ(tok::text,               Toks[0].getKind());
752    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
753
754    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
755    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
756
757    ASSERT_EQ(tok::newline,            Toks[2].getKind());
758    ASSERT_EQ(tok::newline,            Toks[3].getKind());
759  }
760}
761
762// Verbatim line with Doxygen escape sequences, which should not be expanded.
763TEST_F(CommentLexerTest, VerbatimLine2) {
764  const char *Sources[] = {
765    "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
766    "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
767  };
768
769  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
770    std::vector<Token> Toks;
771
772    lexString(Sources[i], Toks);
773
774    ASSERT_EQ(5U, Toks.size());
775
776    ASSERT_EQ(tok::text,               Toks[0].getKind());
777    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
778
779    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
780    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
781
782    ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
783    ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
784                                       Toks[2].getVerbatimLineText());
785
786    ASSERT_EQ(tok::newline,            Toks[3].getKind());
787    ASSERT_EQ(tok::newline,            Toks[4].getKind());
788  }
789}
790
791// Verbatim line should not eat anything from next source line.
792TEST_F(CommentLexerTest, VerbatimLine3) {
793  const char *Source =
794    "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
795    " * Meow\n"
796    " */";
797
798  std::vector<Token> Toks;
799
800  lexString(Source, Toks);
801
802  ASSERT_EQ(9U, Toks.size());
803
804  ASSERT_EQ(tok::text,               Toks[0].getKind());
805  ASSERT_EQ(StringRef(" "),          Toks[0].getText());
806
807  ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
808  ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
809
810  ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
811  ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
812                                     Toks[2].getVerbatimLineText());
813  ASSERT_EQ(tok::newline,            Toks[3].getKind());
814
815  ASSERT_EQ(tok::text,               Toks[4].getKind());
816  ASSERT_EQ(StringRef(" Meow"),      Toks[4].getText());
817  ASSERT_EQ(tok::newline,            Toks[5].getKind());
818
819  ASSERT_EQ(tok::text,               Toks[6].getKind());
820  ASSERT_EQ(StringRef(" "),          Toks[6].getText());
821
822  ASSERT_EQ(tok::newline,            Toks[7].getKind());
823  ASSERT_EQ(tok::newline,            Toks[8].getKind());
824}
825
826TEST_F(CommentLexerTest, HTML1) {
827  const char *Source =
828    "// <";
829
830  std::vector<Token> Toks;
831
832  lexString(Source, Toks);
833
834  ASSERT_EQ(3U, Toks.size());
835
836  ASSERT_EQ(tok::text,      Toks[0].getKind());
837  ASSERT_EQ(StringRef(" "), Toks[0].getText());
838
839  ASSERT_EQ(tok::text,      Toks[1].getKind());
840  ASSERT_EQ(StringRef("<"), Toks[1].getText());
841
842  ASSERT_EQ(tok::newline,   Toks[2].getKind());
843}
844
845TEST_F(CommentLexerTest, HTML2) {
846  const char *Source =
847    "// a<2";
848
849  std::vector<Token> Toks;
850
851  lexString(Source, Toks);
852
853  ASSERT_EQ(4U, Toks.size());
854
855  ASSERT_EQ(tok::text,       Toks[0].getKind());
856  ASSERT_EQ(StringRef(" a"), Toks[0].getText());
857
858  ASSERT_EQ(tok::text,       Toks[1].getKind());
859  ASSERT_EQ(StringRef("<"),  Toks[1].getText());
860
861  ASSERT_EQ(tok::text,       Toks[2].getKind());
862  ASSERT_EQ(StringRef("2"),  Toks[2].getText());
863
864  ASSERT_EQ(tok::newline,    Toks[3].getKind());
865}
866
867TEST_F(CommentLexerTest, HTML3) {
868  const char *Source =
869    "// < img";
870
871  std::vector<Token> Toks;
872
873  lexString(Source, Toks);
874
875  ASSERT_EQ(4U, Toks.size());
876
877  ASSERT_EQ(tok::text,         Toks[0].getKind());
878  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
879
880  ASSERT_EQ(tok::text,         Toks[1].getKind());
881  ASSERT_EQ(StringRef("<"),    Toks[1].getText());
882
883  ASSERT_EQ(tok::text,         Toks[2].getKind());
884  ASSERT_EQ(StringRef(" img"), Toks[2].getText());
885
886  ASSERT_EQ(tok::newline,      Toks[3].getKind());
887}
888
889TEST_F(CommentLexerTest, HTML4) {
890  const char *Sources[] = {
891    "// <img",
892    "// <img "
893  };
894
895  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
896    std::vector<Token> Toks;
897
898    lexString(Sources[i], Toks);
899
900    ASSERT_EQ(3U, Toks.size());
901
902    ASSERT_EQ(tok::text,           Toks[0].getKind());
903    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
904
905    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
906    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
907
908    ASSERT_EQ(tok::newline,        Toks[2].getKind());
909  }
910}
911
912TEST_F(CommentLexerTest, HTML5) {
913  const char *Source =
914    "// <img 42";
915
916  std::vector<Token> Toks;
917
918  lexString(Source, Toks);
919
920  ASSERT_EQ(4U, Toks.size());
921
922  ASSERT_EQ(tok::text,           Toks[0].getKind());
923  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
924
925  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
926  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
927
928  ASSERT_EQ(tok::text,           Toks[2].getKind());
929  ASSERT_EQ(StringRef("42"),     Toks[2].getText());
930
931  ASSERT_EQ(tok::newline,        Toks[3].getKind());
932}
933
934TEST_F(CommentLexerTest, HTML6) {
935  const char *Source = "// <img> Meow";
936
937  std::vector<Token> Toks;
938
939  lexString(Source, Toks);
940
941  ASSERT_EQ(5U, Toks.size());
942
943  ASSERT_EQ(tok::text,           Toks[0].getKind());
944  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
945
946  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
947  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
948
949  ASSERT_EQ(tok::html_greater,   Toks[2].getKind());
950
951  ASSERT_EQ(tok::text,           Toks[3].getKind());
952  ASSERT_EQ(StringRef(" Meow"),  Toks[3].getText());
953
954  ASSERT_EQ(tok::newline,        Toks[4].getKind());
955}
956
957TEST_F(CommentLexerTest, HTML7) {
958  const char *Source = "// <img=";
959
960  std::vector<Token> Toks;
961
962  lexString(Source, Toks);
963
964  ASSERT_EQ(4U, Toks.size());
965
966  ASSERT_EQ(tok::text,           Toks[0].getKind());
967  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
968
969  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
970  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
971
972  ASSERT_EQ(tok::text,           Toks[2].getKind());
973  ASSERT_EQ(StringRef("="),      Toks[2].getText());
974
975  ASSERT_EQ(tok::newline,        Toks[3].getKind());
976}
977
978TEST_F(CommentLexerTest, HTML8) {
979  const char *Source = "// <img src=> Meow";
980
981  std::vector<Token> Toks;
982
983  lexString(Source, Toks);
984
985  ASSERT_EQ(7U, Toks.size());
986
987  ASSERT_EQ(tok::text,           Toks[0].getKind());
988  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
989
990  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
991  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
992
993  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
994  ASSERT_EQ(StringRef("src"),   Toks[2].getHTMLIdent());
995
996  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
997
998  ASSERT_EQ(tok::html_greater,   Toks[4].getKind());
999
1000  ASSERT_EQ(tok::text,           Toks[5].getKind());
1001  ASSERT_EQ(StringRef(" Meow"),  Toks[5].getText());
1002
1003  ASSERT_EQ(tok::newline,        Toks[6].getKind());
1004}
1005
1006TEST_F(CommentLexerTest, HTML9) {
1007  const char *Sources[] = {
1008    "// <img src",
1009    "// <img src "
1010  };
1011
1012  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1013    std::vector<Token> Toks;
1014
1015    lexString(Sources[i], Toks);
1016
1017    ASSERT_EQ(4U, Toks.size());
1018
1019    ASSERT_EQ(tok::text,           Toks[0].getKind());
1020    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1021
1022    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1023    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1024
1025    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1026    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1027
1028    ASSERT_EQ(tok::newline,        Toks[3].getKind());
1029  }
1030}
1031
1032TEST_F(CommentLexerTest, HTML10) {
1033  const char *Sources[] = {
1034    "// <img src=",
1035    "// <img src ="
1036  };
1037
1038  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1039    std::vector<Token> Toks;
1040
1041    lexString(Sources[i], Toks);
1042
1043    ASSERT_EQ(5U, Toks.size());
1044
1045    ASSERT_EQ(tok::text,           Toks[0].getKind());
1046    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1047
1048    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1049    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1050
1051    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1052    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1053
1054    ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1055
1056    ASSERT_EQ(tok::newline,        Toks[4].getKind());
1057  }
1058}
1059
1060TEST_F(CommentLexerTest, HTML11) {
1061  const char *Sources[] = {
1062    "// <img src=\"",
1063    "// <img src = \"",
1064    "// <img src=\'",
1065    "// <img src = \'"
1066  };
1067
1068  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1069    std::vector<Token> Toks;
1070
1071    lexString(Sources[i], Toks);
1072
1073    ASSERT_EQ(6U, Toks.size());
1074
1075    ASSERT_EQ(tok::text,               Toks[0].getKind());
1076    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1077
1078    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1079    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1080
1081    ASSERT_EQ(tok::html_ident,         Toks[2].getKind());
1082    ASSERT_EQ(StringRef("src"),        Toks[2].getHTMLIdent());
1083
1084    ASSERT_EQ(tok::html_equals,        Toks[3].getKind());
1085
1086    ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1087    ASSERT_EQ(StringRef(""),           Toks[4].getHTMLQuotedString());
1088
1089    ASSERT_EQ(tok::newline,            Toks[5].getKind());
1090  }
1091}
1092
1093TEST_F(CommentLexerTest, HTML12) {
1094  const char *Source = "// <img src=@";
1095
1096  std::vector<Token> Toks;
1097
1098  lexString(Source, Toks);
1099
1100  ASSERT_EQ(6U, Toks.size());
1101
1102  ASSERT_EQ(tok::text,           Toks[0].getKind());
1103  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1104
1105  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1106  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1107
1108  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1109  ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1110
1111  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1112
1113  ASSERT_EQ(tok::text,           Toks[4].getKind());
1114  ASSERT_EQ(StringRef("@"),      Toks[4].getText());
1115
1116  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1117}
1118
1119TEST_F(CommentLexerTest, HTML13) {
1120  const char *Sources[] = {
1121    "// <img src=\"val\\\"\\'val",
1122    "// <img src=\"val\\\"\\'val\"",
1123    "// <img src=\'val\\\"\\'val",
1124    "// <img src=\'val\\\"\\'val\'"
1125  };
1126
1127  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1128    std::vector<Token> Toks;
1129
1130    lexString(Sources[i], Toks);
1131
1132    ASSERT_EQ(6U, Toks.size());
1133
1134    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1135    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1136
1137    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1138    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1139
1140    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1141    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1142
1143    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1144
1145    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1146    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1147
1148    ASSERT_EQ(tok::newline,               Toks[5].getKind());
1149  }
1150}
1151
1152TEST_F(CommentLexerTest, HTML14) {
1153  const char *Sources[] = {
1154    "// <img src=\"val\\\"\\'val\">",
1155    "// <img src=\'val\\\"\\'val\'>"
1156  };
1157
1158  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1159    std::vector<Token> Toks;
1160
1161    lexString(Sources[i], Toks);
1162
1163    ASSERT_EQ(7U, Toks.size());
1164
1165    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1166    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1167
1168    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1169    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1170
1171    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1172    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1173
1174    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1175
1176    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1177    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1178
1179    ASSERT_EQ(tok::html_greater,          Toks[5].getKind());
1180
1181    ASSERT_EQ(tok::newline,               Toks[6].getKind());
1182  }
1183}
1184
1185TEST_F(CommentLexerTest, HTML15) {
1186  const char *Sources[] = {
1187    "// <img/>",
1188    "// <img />"
1189  };
1190
1191  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1192    std::vector<Token> Toks;
1193
1194    lexString(Sources[i], Toks);
1195
1196    ASSERT_EQ(4U, Toks.size());
1197
1198    ASSERT_EQ(tok::text,               Toks[0].getKind());
1199    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1200
1201    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1202    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1203
1204    ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1205
1206    ASSERT_EQ(tok::newline,            Toks[3].getKind());
1207  }
1208}
1209
1210TEST_F(CommentLexerTest, HTML16) {
1211  const char *Sources[] = {
1212    "// <img/ Aaa",
1213    "// <img / Aaa"
1214  };
1215
1216  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1217    std::vector<Token> Toks;
1218
1219    lexString(Sources[i], Toks);
1220
1221    ASSERT_EQ(5U, Toks.size());
1222
1223    ASSERT_EQ(tok::text,               Toks[0].getKind());
1224    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1225
1226    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1227    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1228
1229    ASSERT_EQ(tok::text,               Toks[2].getKind());
1230    ASSERT_EQ(StringRef("/"),          Toks[2].getText());
1231
1232    ASSERT_EQ(tok::text,               Toks[3].getKind());
1233    ASSERT_EQ(StringRef(" Aaa"),       Toks[3].getText());
1234
1235    ASSERT_EQ(tok::newline,            Toks[4].getKind());
1236  }
1237}
1238
1239TEST_F(CommentLexerTest, HTML17) {
1240  const char *Source = "// </";
1241
1242  std::vector<Token> Toks;
1243
1244  lexString(Source, Toks);
1245
1246  ASSERT_EQ(3U, Toks.size());
1247
1248  ASSERT_EQ(tok::text,       Toks[0].getKind());
1249  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1250
1251  ASSERT_EQ(tok::text,       Toks[1].getKind());
1252  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1253
1254  ASSERT_EQ(tok::newline,    Toks[2].getKind());
1255}
1256
1257TEST_F(CommentLexerTest, HTML18) {
1258  const char *Source = "// </@";
1259
1260  std::vector<Token> Toks;
1261
1262  lexString(Source, Toks);
1263
1264  ASSERT_EQ(4U, Toks.size());
1265
1266  ASSERT_EQ(tok::text,       Toks[0].getKind());
1267  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1268
1269  ASSERT_EQ(tok::text,       Toks[1].getKind());
1270  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1271
1272  ASSERT_EQ(tok::text,       Toks[2].getKind());
1273  ASSERT_EQ(StringRef("@"),  Toks[2].getText());
1274
1275  ASSERT_EQ(tok::newline,    Toks[3].getKind());
1276}
1277
1278TEST_F(CommentLexerTest, HTML19) {
1279  const char *Source = "// </img";
1280
1281  std::vector<Token> Toks;
1282
1283  lexString(Source, Toks);
1284
1285  ASSERT_EQ(3U, Toks.size());
1286
1287  ASSERT_EQ(tok::text,         Toks[0].getKind());
1288  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1289
1290  ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1291  ASSERT_EQ(StringRef("img"),  Toks[1].getHTMLTagEndName());
1292
1293  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1294}
1295
1296TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1297  const char *Source = "// <tag>";
1298
1299  std::vector<Token> Toks;
1300
1301  lexString(Source, Toks);
1302
1303  ASSERT_EQ(4U, Toks.size());
1304
1305  ASSERT_EQ(tok::text,         Toks[0].getKind());
1306  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1307
1308  ASSERT_EQ(tok::text,         Toks[1].getKind());
1309  ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1310
1311  ASSERT_EQ(tok::text,         Toks[2].getKind());
1312  ASSERT_EQ(StringRef(">"),    Toks[2].getText());
1313
1314  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1315}
1316
1317TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1318  const char *Source = "// </tag>";
1319
1320  std::vector<Token> Toks;
1321
1322  lexString(Source, Toks);
1323
1324  ASSERT_EQ(4U, Toks.size());
1325
1326  ASSERT_EQ(tok::text,          Toks[0].getKind());
1327  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1328
1329  ASSERT_EQ(tok::text,          Toks[1].getKind());
1330  ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1331
1332  ASSERT_EQ(tok::text,          Toks[2].getKind());
1333  ASSERT_EQ(StringRef(">"),     Toks[2].getText());
1334
1335  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1336}
1337
1338TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1339  const char *Source = "// &";
1340
1341  std::vector<Token> Toks;
1342
1343  lexString(Source, Toks);
1344
1345  ASSERT_EQ(3U, Toks.size());
1346
1347  ASSERT_EQ(tok::text,         Toks[0].getKind());
1348  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1349
1350  ASSERT_EQ(tok::text,         Toks[1].getKind());
1351  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1352
1353  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1354}
1355
1356TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1357  const char *Source = "// &!";
1358
1359  std::vector<Token> Toks;
1360
1361  lexString(Source, Toks);
1362
1363  ASSERT_EQ(4U, Toks.size());
1364
1365  ASSERT_EQ(tok::text,         Toks[0].getKind());
1366  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1367
1368  ASSERT_EQ(tok::text,         Toks[1].getKind());
1369  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1370
1371  ASSERT_EQ(tok::text,         Toks[2].getKind());
1372  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1373
1374  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1375}
1376
1377TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1378  const char *Source = "// &amp";
1379
1380  std::vector<Token> Toks;
1381
1382  lexString(Source, Toks);
1383
1384  ASSERT_EQ(3U, Toks.size());
1385
1386  ASSERT_EQ(tok::text,         Toks[0].getKind());
1387  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1388
1389  ASSERT_EQ(tok::text,         Toks[1].getKind());
1390  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1391
1392  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1393}
1394
1395TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1396  const char *Source = "// &amp!";
1397
1398  std::vector<Token> Toks;
1399
1400  lexString(Source, Toks);
1401
1402  ASSERT_EQ(4U, Toks.size());
1403
1404  ASSERT_EQ(tok::text,         Toks[0].getKind());
1405  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1406
1407  ASSERT_EQ(tok::text,         Toks[1].getKind());
1408  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1409
1410  ASSERT_EQ(tok::text,         Toks[2].getKind());
1411  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1412
1413  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1414}
1415
1416TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1417  const char *Source = "// &#";
1418
1419  std::vector<Token> Toks;
1420
1421  lexString(Source, Toks);
1422
1423  ASSERT_EQ(3U, Toks.size());
1424
1425  ASSERT_EQ(tok::text,         Toks[0].getKind());
1426  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1427
1428  ASSERT_EQ(tok::text,         Toks[1].getKind());
1429  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1430
1431  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1432}
1433
1434TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1435  const char *Source = "// &#a";
1436
1437  std::vector<Token> Toks;
1438
1439  lexString(Source, Toks);
1440
1441  ASSERT_EQ(4U, Toks.size());
1442
1443  ASSERT_EQ(tok::text,         Toks[0].getKind());
1444  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1445
1446  ASSERT_EQ(tok::text,         Toks[1].getKind());
1447  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1448
1449  ASSERT_EQ(tok::text,         Toks[2].getKind());
1450  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1451
1452  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1453}
1454
1455TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1456  const char *Source = "// &#42";
1457
1458  std::vector<Token> Toks;
1459
1460  lexString(Source, Toks);
1461
1462  ASSERT_EQ(3U, Toks.size());
1463
1464  ASSERT_EQ(tok::text,         Toks[0].getKind());
1465  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1466
1467  ASSERT_EQ(tok::text,         Toks[1].getKind());
1468  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1469
1470  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1471}
1472
1473TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1474  const char *Source = "// &#42a";
1475
1476  std::vector<Token> Toks;
1477
1478  lexString(Source, Toks);
1479
1480  ASSERT_EQ(4U, Toks.size());
1481
1482  ASSERT_EQ(tok::text,         Toks[0].getKind());
1483  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1484
1485  ASSERT_EQ(tok::text,         Toks[1].getKind());
1486  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1487
1488  ASSERT_EQ(tok::text,         Toks[2].getKind());
1489  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1490
1491  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1492}
1493
1494TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1495  const char *Source = "// &#x";
1496
1497  std::vector<Token> Toks;
1498
1499  lexString(Source, Toks);
1500
1501  ASSERT_EQ(3U, Toks.size());
1502
1503  ASSERT_EQ(tok::text,         Toks[0].getKind());
1504  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1505
1506  ASSERT_EQ(tok::text,         Toks[1].getKind());
1507  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1508
1509  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1510}
1511
1512TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1513  const char *Source = "// &#xz";
1514
1515  std::vector<Token> Toks;
1516
1517  lexString(Source, Toks);
1518
1519  ASSERT_EQ(4U, Toks.size());
1520
1521  ASSERT_EQ(tok::text,         Toks[0].getKind());
1522  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1523
1524  ASSERT_EQ(tok::text,         Toks[1].getKind());
1525  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1526
1527  ASSERT_EQ(tok::text,         Toks[2].getKind());
1528  ASSERT_EQ(StringRef("z"),    Toks[2].getText());
1529
1530  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1531}
1532
1533TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1534  const char *Source = "// &#xab";
1535
1536  std::vector<Token> Toks;
1537
1538  lexString(Source, Toks);
1539
1540  ASSERT_EQ(3U, Toks.size());
1541
1542  ASSERT_EQ(tok::text,          Toks[0].getKind());
1543  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1544
1545  ASSERT_EQ(tok::text,          Toks[1].getKind());
1546  ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1547
1548  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1549}
1550
1551TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1552  const char *Source = "// &#xaBz";
1553
1554  std::vector<Token> Toks;
1555
1556  lexString(Source, Toks);
1557
1558  ASSERT_EQ(4U, Toks.size());
1559
1560  ASSERT_EQ(tok::text,          Toks[0].getKind());
1561  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1562
1563  ASSERT_EQ(tok::text,          Toks[1].getKind());
1564  ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1565
1566  ASSERT_EQ(tok::text,          Toks[2].getKind());
1567  ASSERT_EQ(StringRef("z"),     Toks[2].getText());
1568
1569  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1570}
1571
1572TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1573  const char *Source = "// &amp;";
1574
1575  std::vector<Token> Toks;
1576
1577  lexString(Source, Toks);
1578
1579  ASSERT_EQ(3U, Toks.size());
1580
1581  ASSERT_EQ(tok::text,          Toks[0].getKind());
1582  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1583
1584  ASSERT_EQ(tok::text,          Toks[1].getKind());
1585  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1586
1587  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1588}
1589
1590TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1591  const char *Source = "// &amp;&lt;";
1592
1593  std::vector<Token> Toks;
1594
1595  lexString(Source, Toks);
1596
1597  ASSERT_EQ(4U, Toks.size());
1598
1599  ASSERT_EQ(tok::text,          Toks[0].getKind());
1600  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1601
1602  ASSERT_EQ(tok::text,          Toks[1].getKind());
1603  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1604
1605  ASSERT_EQ(tok::text,          Toks[2].getKind());
1606  ASSERT_EQ(StringRef("<"),     Toks[2].getText());
1607
1608  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1609}
1610
1611TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1612  const char *Source = "// &amp; meow";
1613
1614  std::vector<Token> Toks;
1615
1616  lexString(Source, Toks);
1617
1618  ASSERT_EQ(4U, Toks.size());
1619
1620  ASSERT_EQ(tok::text,          Toks[0].getKind());
1621  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1622
1623  ASSERT_EQ(tok::text,          Toks[1].getKind());
1624  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1625
1626  ASSERT_EQ(tok::text,          Toks[2].getKind());
1627  ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1628
1629  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1630}
1631
1632TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1633  const char *Sources[] = {
1634    "// &#61;",
1635    "// &#x3d;",
1636    "// &#X3d;"
1637  };
1638
1639  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1640    std::vector<Token> Toks;
1641
1642    lexString(Sources[i], Toks);
1643
1644    ASSERT_EQ(3U, Toks.size());
1645
1646    ASSERT_EQ(tok::text,          Toks[0].getKind());
1647    ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1648
1649    ASSERT_EQ(tok::text,          Toks[1].getKind());
1650    ASSERT_EQ(StringRef("="),     Toks[1].getText());
1651
1652    ASSERT_EQ(tok::newline,       Toks[2].getKind());
1653  }
1654}
1655
1656TEST_F(CommentLexerTest, MultipleComments) {
1657  const char *Source =
1658    "// Aaa\n"
1659    "/// Bbb\n"
1660    "/* Ccc\n"
1661    " * Ddd*/\n"
1662    "/** Eee*/";
1663
1664  std::vector<Token> Toks;
1665
1666  lexString(Source, Toks);
1667
1668  ASSERT_EQ(12U, Toks.size());
1669
1670  ASSERT_EQ(tok::text,           Toks[0].getKind());
1671  ASSERT_EQ(StringRef(" Aaa"),   Toks[0].getText());
1672  ASSERT_EQ(tok::newline,        Toks[1].getKind());
1673
1674  ASSERT_EQ(tok::text,           Toks[2].getKind());
1675  ASSERT_EQ(StringRef(" Bbb"),   Toks[2].getText());
1676  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1677
1678  ASSERT_EQ(tok::text,           Toks[4].getKind());
1679  ASSERT_EQ(StringRef(" Ccc"),   Toks[4].getText());
1680  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1681
1682  ASSERT_EQ(tok::text,           Toks[6].getKind());
1683  ASSERT_EQ(StringRef(" Ddd"),   Toks[6].getText());
1684  ASSERT_EQ(tok::newline,        Toks[7].getKind());
1685  ASSERT_EQ(tok::newline,        Toks[8].getKind());
1686
1687  ASSERT_EQ(tok::text,           Toks[9].getKind());
1688  ASSERT_EQ(StringRef(" Eee"),   Toks[9].getText());
1689
1690  ASSERT_EQ(tok::newline,        Toks[10].getKind());
1691  ASSERT_EQ(tok::newline,        Toks[11].getKind());
1692}
1693
1694} // end namespace comments
1695} // end namespace clang
1696
1697