CommentLexer.cpp revision 44a3ddbf7d2f46a002b5e85b240359c435509b4e
1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentLexer.h"
11#include "clang/AST/CommentCommandTraits.h"
12#include "clang/Basic/Diagnostic.h"
13#include "clang/Basic/DiagnosticOptions.h"
14#include "clang/Basic/FileManager.h"
15#include "clang/Basic/SourceManager.h"
16#include "llvm/ADT/STLExtras.h"
17#include "gtest/gtest.h"
18#include <vector>
19
20using namespace llvm;
21using namespace clang;
22
23namespace clang {
24namespace comments {
25
26namespace {
27class CommentLexerTest : public ::testing::Test {
28protected:
29  CommentLexerTest()
30    : FileMgr(FileMgrOpts),
31      DiagID(new DiagnosticIDs()),
32      Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
33      SourceMgr(Diags, FileMgr),
34      Traits(Allocator) {
35  }
36
37  FileSystemOptions FileMgrOpts;
38  FileManager FileMgr;
39  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
40  DiagnosticsEngine Diags;
41  SourceManager SourceMgr;
42  llvm::BumpPtrAllocator Allocator;
43  CommandTraits Traits;
44
45  void lexString(const char *Source, std::vector<Token> &Toks);
46
47  StringRef getCommandName(const Token &Tok) {
48    return Traits.getCommandInfo(Tok.getCommandID())->Name;
49  }
50
51  StringRef getVerbatimBlockName(const Token &Tok) {
52    return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
53  }
54
55  StringRef getVerbatimLineName(const Token &Tok) {
56    return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
57  }
58};
59
60void CommentLexerTest::lexString(const char *Source,
61                                 std::vector<Token> &Toks) {
62  MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
63  FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
64  SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
65
66  Lexer L(Allocator, Traits, Begin, Source, Source + strlen(Source));
67
68  while (1) {
69    Token Tok;
70    L.lex(Tok);
71    if (Tok.is(tok::eof))
72      break;
73    Toks.push_back(Tok);
74  }
75}
76
77} // unnamed namespace
78
79// Empty source range should be handled.
80TEST_F(CommentLexerTest, Basic1) {
81  const char *Source = "";
82  std::vector<Token> Toks;
83
84  lexString(Source, Toks);
85
86  ASSERT_EQ(0U, Toks.size());
87}
88
89// Empty comments should be handled.
90TEST_F(CommentLexerTest, Basic2) {
91  const char *Sources[] = {
92    "//", "///", "//!", "///<", "//!<"
93  };
94  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
95    std::vector<Token> Toks;
96
97    lexString(Sources[i], Toks);
98
99    ASSERT_EQ(1U, Toks.size());
100
101    ASSERT_EQ(tok::newline, Toks[0].getKind());
102  }
103}
104
105// Empty comments should be handled.
106TEST_F(CommentLexerTest, Basic3) {
107  const char *Sources[] = {
108    "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
109  };
110  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
111    std::vector<Token> Toks;
112
113    lexString(Sources[i], Toks);
114
115    ASSERT_EQ(2U, Toks.size());
116
117    ASSERT_EQ(tok::newline, Toks[0].getKind());
118    ASSERT_EQ(tok::newline, Toks[1].getKind());
119  }
120}
121
122// Single comment with plain text.
123TEST_F(CommentLexerTest, Basic4) {
124  const char *Sources[] = {
125    "// Meow",   "/// Meow",    "//! Meow",
126    "// Meow\n", "// Meow\r\n", "//! Meow\r",
127  };
128
129  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
130    std::vector<Token> Toks;
131
132    lexString(Sources[i], Toks);
133
134    ASSERT_EQ(2U, Toks.size());
135
136    ASSERT_EQ(tok::text,          Toks[0].getKind());
137    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
138
139    ASSERT_EQ(tok::newline,       Toks[1].getKind());
140  }
141}
142
143// Single comment with plain text.
144TEST_F(CommentLexerTest, Basic5) {
145  const char *Sources[] = {
146    "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
147  };
148
149  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
150    std::vector<Token> Toks;
151
152    lexString(Sources[i], Toks);
153
154    ASSERT_EQ(3U, Toks.size());
155
156    ASSERT_EQ(tok::text,          Toks[0].getKind());
157    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
158
159    ASSERT_EQ(tok::newline,       Toks[1].getKind());
160    ASSERT_EQ(tok::newline,       Toks[2].getKind());
161  }
162}
163
164// Test newline escaping.
165TEST_F(CommentLexerTest, Basic6) {
166  const char *Sources[] = {
167    "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
168    "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169    "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
170  };
171
172  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
173    std::vector<Token> Toks;
174
175    lexString(Sources[i], Toks);
176
177    ASSERT_EQ(10U, Toks.size());
178
179    ASSERT_EQ(tok::text,         Toks[0].getKind());
180    ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
181    ASSERT_EQ(tok::text,         Toks[1].getKind());
182    ASSERT_EQ(StringRef("\\"),   Toks[1].getText());
183    ASSERT_EQ(tok::newline,      Toks[2].getKind());
184
185    ASSERT_EQ(tok::text,         Toks[3].getKind());
186    ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
187    ASSERT_EQ(tok::text,         Toks[4].getKind());
188    ASSERT_EQ(StringRef("\\"),   Toks[4].getText());
189    ASSERT_EQ(tok::text,         Toks[5].getKind());
190    ASSERT_EQ(StringRef(" "),    Toks[5].getText());
191    ASSERT_EQ(tok::newline,      Toks[6].getKind());
192
193    ASSERT_EQ(tok::text,         Toks[7].getKind());
194    ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
195    ASSERT_EQ(tok::newline,      Toks[8].getKind());
196
197    ASSERT_EQ(tok::newline,      Toks[9].getKind());
198  }
199}
200
201// Check that we skip C-style aligned stars correctly.
202TEST_F(CommentLexerTest, Basic7) {
203  const char *Source =
204    "/* Aaa\n"
205    " * Bbb\r\n"
206    "\t* Ccc\n"
207    "  ! Ddd\n"
208    "  * Eee\n"
209    "  ** Fff\n"
210    " */";
211  std::vector<Token> Toks;
212
213  lexString(Source, Toks);
214
215  ASSERT_EQ(15U, Toks.size());
216
217  ASSERT_EQ(tok::text,         Toks[0].getKind());
218  ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
219  ASSERT_EQ(tok::newline,      Toks[1].getKind());
220
221  ASSERT_EQ(tok::text,         Toks[2].getKind());
222  ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
223  ASSERT_EQ(tok::newline,      Toks[3].getKind());
224
225  ASSERT_EQ(tok::text,         Toks[4].getKind());
226  ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
227  ASSERT_EQ(tok::newline,      Toks[5].getKind());
228
229  ASSERT_EQ(tok::text,            Toks[6].getKind());
230  ASSERT_EQ(StringRef("  ! Ddd"), Toks[6].getText());
231  ASSERT_EQ(tok::newline,         Toks[7].getKind());
232
233  ASSERT_EQ(tok::text,         Toks[8].getKind());
234  ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
235  ASSERT_EQ(tok::newline,      Toks[9].getKind());
236
237  ASSERT_EQ(tok::text,          Toks[10].getKind());
238  ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
239  ASSERT_EQ(tok::newline,       Toks[11].getKind());
240
241  ASSERT_EQ(tok::text,         Toks[12].getKind());
242  ASSERT_EQ(StringRef(" "),    Toks[12].getText());
243
244  ASSERT_EQ(tok::newline,      Toks[13].getKind());
245  ASSERT_EQ(tok::newline,      Toks[14].getKind());
246}
247
248// A command marker followed by comment end.
249TEST_F(CommentLexerTest, DoxygenCommand1) {
250  const char *Sources[] = { "//@", "///@", "//!@" };
251  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
252    std::vector<Token> Toks;
253
254    lexString(Sources[i], Toks);
255
256    ASSERT_EQ(2U, Toks.size());
257
258    ASSERT_EQ(tok::text,          Toks[0].getKind());
259    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
260
261    ASSERT_EQ(tok::newline,       Toks[1].getKind());
262  }
263}
264
265// A command marker followed by comment end.
266TEST_F(CommentLexerTest, DoxygenCommand2) {
267  const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
268  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
269    std::vector<Token> Toks;
270
271    lexString(Sources[i], Toks);
272
273    ASSERT_EQ(3U, Toks.size());
274
275    ASSERT_EQ(tok::text,          Toks[0].getKind());
276    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
277
278    ASSERT_EQ(tok::newline,       Toks[1].getKind());
279    ASSERT_EQ(tok::newline,       Toks[2].getKind());
280  }
281}
282
283// A command marker followed by comment end.
284TEST_F(CommentLexerTest, DoxygenCommand3) {
285  const char *Sources[] = { "/*\\*/", "/**\\*/" };
286  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
287    std::vector<Token> Toks;
288
289    lexString(Sources[i], Toks);
290
291    ASSERT_EQ(3U, Toks.size());
292
293    ASSERT_EQ(tok::text,           Toks[0].getKind());
294    ASSERT_EQ(StringRef("\\"),     Toks[0].getText());
295
296    ASSERT_EQ(tok::newline,        Toks[1].getKind());
297    ASSERT_EQ(tok::newline,        Toks[2].getKind());
298  }
299}
300
301// Doxygen escape sequences.
302TEST_F(CommentLexerTest, DoxygenCommand4) {
303  const char *Source =
304    "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
305  const char *Text[] = {
306    " ",
307    "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
308    "<",  " ", ">", " ", "%", " ", "\"", " ", ".", " ",
309    "::", ""
310  };
311
312  std::vector<Token> Toks;
313
314  lexString(Source, Toks);
315
316  ASSERT_EQ(array_lengthof(Text), Toks.size());
317
318  for (size_t i = 0, e = Toks.size(); i != e; i++) {
319    if(Toks[i].is(tok::text))
320      ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
321        << "index " << i;
322  }
323}
324
325// A command marker followed by a non-letter that is not a part of an escape
326// sequence.
327TEST_F(CommentLexerTest, DoxygenCommand5) {
328  const char *Source = "/// \\^ \\0";
329  std::vector<Token> Toks;
330
331  lexString(Source, Toks);
332
333  ASSERT_EQ(6U, Toks.size());
334
335  ASSERT_EQ(tok::text,       Toks[0].getKind());
336  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
337
338  ASSERT_EQ(tok::text,       Toks[1].getKind());
339  ASSERT_EQ(StringRef("\\"), Toks[1].getText());
340
341  ASSERT_EQ(tok::text,       Toks[2].getKind());
342  ASSERT_EQ(StringRef("^ "), Toks[2].getText());
343
344  ASSERT_EQ(tok::text,       Toks[3].getKind());
345  ASSERT_EQ(StringRef("\\"), Toks[3].getText());
346
347  ASSERT_EQ(tok::text,       Toks[4].getKind());
348  ASSERT_EQ(StringRef("0"),  Toks[4].getText());
349
350  ASSERT_EQ(tok::newline,    Toks[5].getKind());
351}
352
353TEST_F(CommentLexerTest, DoxygenCommand6) {
354  const char *Source = "/// \\brief Aaa.";
355  std::vector<Token> Toks;
356
357  lexString(Source, Toks);
358
359  ASSERT_EQ(4U, Toks.size());
360
361  ASSERT_EQ(tok::text,          Toks[0].getKind());
362  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
363
364  ASSERT_EQ(tok::command,       Toks[1].getKind());
365  ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
366
367  ASSERT_EQ(tok::text,          Toks[2].getKind());
368  ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
369
370  ASSERT_EQ(tok::newline,       Toks[3].getKind());
371}
372
373TEST_F(CommentLexerTest, DoxygenCommand7) {
374  const char *Source = "/// \\em\\em \\em\t\\em\n";
375  std::vector<Token> Toks;
376
377  lexString(Source, Toks);
378
379  ASSERT_EQ(8U, Toks.size());
380
381  ASSERT_EQ(tok::text,       Toks[0].getKind());
382  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
383
384  ASSERT_EQ(tok::command,    Toks[1].getKind());
385  ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
386
387  ASSERT_EQ(tok::command,    Toks[2].getKind());
388  ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
389
390  ASSERT_EQ(tok::text,       Toks[3].getKind());
391  ASSERT_EQ(StringRef(" "),  Toks[3].getText());
392
393  ASSERT_EQ(tok::command,    Toks[4].getKind());
394  ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
395
396  ASSERT_EQ(tok::text,       Toks[5].getKind());
397  ASSERT_EQ(StringRef("\t"), Toks[5].getText());
398
399  ASSERT_EQ(tok::command,    Toks[6].getKind());
400  ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
401
402  ASSERT_EQ(tok::newline,    Toks[7].getKind());
403}
404
405TEST_F(CommentLexerTest, DoxygenCommand8) {
406  const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
407  std::vector<Token> Toks;
408
409  lexString(Source, Toks);
410
411  ASSERT_EQ(8U, Toks.size());
412
413  ASSERT_EQ(tok::text,        Toks[0].getKind());
414  ASSERT_EQ(StringRef(" "),   Toks[0].getText());
415
416  ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
417  ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
418
419  ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
420  ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
421
422  ASSERT_EQ(tok::text,        Toks[3].getKind());
423  ASSERT_EQ(StringRef(" "),   Toks[3].getText());
424
425  ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
426  ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
427
428  ASSERT_EQ(tok::text,        Toks[5].getKind());
429  ASSERT_EQ(StringRef("\t"),  Toks[5].getText());
430
431  ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
432  ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
433
434  ASSERT_EQ(tok::newline,     Toks[7].getKind());
435}
436
437TEST_F(CommentLexerTest, DoxygenCommand9) {
438  const char *Source = "// \\c\n";
439  std::vector<Token> Toks;
440
441  lexString(Source, Toks);
442
443  ASSERT_EQ(3U, Toks.size());
444
445  ASSERT_EQ(tok::text,      Toks[0].getKind());
446  ASSERT_EQ(StringRef(" "), Toks[0].getText());
447
448  ASSERT_EQ(tok::command,   Toks[1].getKind());
449  ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
450
451  ASSERT_EQ(tok::newline,   Toks[2].getKind());
452}
453
454// Empty verbatim block.
455TEST_F(CommentLexerTest, VerbatimBlock1) {
456  const char *Sources[] = {
457    "/// \\verbatim\\endverbatim\n//",
458    "/** \\verbatim\\endverbatim*/"
459  };
460
461  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
462    std::vector<Token> Toks;
463
464    lexString(Sources[i], Toks);
465
466    ASSERT_EQ(5U, Toks.size());
467
468    ASSERT_EQ(tok::text,                 Toks[0].getKind());
469    ASSERT_EQ(StringRef(" "),            Toks[0].getText());
470
471    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
472    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
473
474    ASSERT_EQ(tok::verbatim_block_end,   Toks[2].getKind());
475    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[2]));
476
477    ASSERT_EQ(tok::newline,              Toks[3].getKind());
478    ASSERT_EQ(tok::newline,              Toks[4].getKind());
479  }
480}
481
482// Empty verbatim block without an end command.
483TEST_F(CommentLexerTest, VerbatimBlock2) {
484  const char *Source = "/// \\verbatim";
485
486  std::vector<Token> Toks;
487
488  lexString(Source, Toks);
489
490  ASSERT_EQ(3U, Toks.size());
491
492  ASSERT_EQ(tok::text,                 Toks[0].getKind());
493  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
494
495  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
496  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
497
498  ASSERT_EQ(tok::newline,              Toks[2].getKind());
499}
500
501// Empty verbatim block without an end command.
502TEST_F(CommentLexerTest, VerbatimBlock3) {
503  const char *Source = "/** \\verbatim*/";
504
505  std::vector<Token> Toks;
506
507  lexString(Source, Toks);
508
509  ASSERT_EQ(4U, Toks.size());
510
511  ASSERT_EQ(tok::text,                 Toks[0].getKind());
512  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
513
514  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
515  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
516
517  ASSERT_EQ(tok::newline,              Toks[2].getKind());
518  ASSERT_EQ(tok::newline,              Toks[3].getKind());
519}
520
521// Single-line verbatim block.
522TEST_F(CommentLexerTest, VerbatimBlock4) {
523  const char *Sources[] = {
524    "/// Meow \\verbatim aaa \\endverbatim\n//",
525    "/** Meow \\verbatim aaa \\endverbatim*/"
526  };
527
528  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
529    std::vector<Token> Toks;
530
531    lexString(Sources[i], Toks);
532
533    ASSERT_EQ(6U, Toks.size());
534
535    ASSERT_EQ(tok::text,                 Toks[0].getKind());
536    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
537
538    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
539    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
540
541    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
542    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
543
544    ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
545    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[3]));
546
547    ASSERT_EQ(tok::newline,              Toks[4].getKind());
548    ASSERT_EQ(tok::newline,              Toks[5].getKind());
549  }
550}
551
552// Single-line verbatim block without an end command.
553TEST_F(CommentLexerTest, VerbatimBlock5) {
554  const char *Sources[] = {
555    "/// Meow \\verbatim aaa \n//",
556    "/** Meow \\verbatim aaa */"
557  };
558
559  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
560    std::vector<Token> Toks;
561
562    lexString(Sources[i], Toks);
563
564    ASSERT_EQ(5U, Toks.size());
565
566    ASSERT_EQ(tok::text,                 Toks[0].getKind());
567    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
568
569    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
570    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
571
572    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
573    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
574
575    ASSERT_EQ(tok::newline,              Toks[3].getKind());
576    ASSERT_EQ(tok::newline,              Toks[4].getKind());
577  }
578}
579
580TEST_F(CommentLexerTest, VerbatimBlock6) {
581  const char *Source =
582    "// \\verbatim\n"
583    "// Aaa\n"
584    "//\n"
585    "// Bbb\n"
586    "// \\endverbatim\n";
587
588  std::vector<Token> Toks;
589
590  lexString(Source, Toks);
591
592  ASSERT_EQ(10U, Toks.size());
593
594  ASSERT_EQ(tok::text,                 Toks[0].getKind());
595  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
596
597  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
598  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
599
600  ASSERT_EQ(tok::newline,              Toks[2].getKind());
601
602  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
603  ASSERT_EQ(StringRef(" Aaa"),         Toks[3].getVerbatimBlockText());
604
605  ASSERT_EQ(tok::newline,              Toks[4].getKind());
606
607  ASSERT_EQ(tok::newline,              Toks[5].getKind());
608
609  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
610  ASSERT_EQ(StringRef(" Bbb"),         Toks[6].getVerbatimBlockText());
611
612  ASSERT_EQ(tok::newline,              Toks[7].getKind());
613
614  ASSERT_EQ(tok::verbatim_block_end,   Toks[8].getKind());
615  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[8]));
616
617  ASSERT_EQ(tok::newline,              Toks[9].getKind());
618}
619
620TEST_F(CommentLexerTest, VerbatimBlock7) {
621  const char *Source =
622    "/* \\verbatim\n"
623    " * Aaa\n"
624    " *\n"
625    " * Bbb\n"
626    " * \\endverbatim\n"
627    " */";
628
629  std::vector<Token> Toks;
630
631  lexString(Source, Toks);
632
633  ASSERT_EQ(10U, Toks.size());
634
635  ASSERT_EQ(tok::text,                 Toks[0].getKind());
636  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
637
638  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
639  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
640
641  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
642  ASSERT_EQ(StringRef(" Aaa"),         Toks[2].getVerbatimBlockText());
643
644  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
645  ASSERT_EQ(StringRef(""),             Toks[3].getVerbatimBlockText());
646
647  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
648  ASSERT_EQ(StringRef(" Bbb"),         Toks[4].getVerbatimBlockText());
649
650  ASSERT_EQ(tok::verbatim_block_end,   Toks[5].getKind());
651  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[5]));
652
653  ASSERT_EQ(tok::newline,              Toks[6].getKind());
654
655  ASSERT_EQ(tok::text,                 Toks[7].getKind());
656  ASSERT_EQ(StringRef(" "),            Toks[7].getText());
657
658  ASSERT_EQ(tok::newline,              Toks[8].getKind());
659  ASSERT_EQ(tok::newline,              Toks[9].getKind());
660}
661
662// Complex test for verbatim blocks.
663TEST_F(CommentLexerTest, VerbatimBlock8) {
664  const char *Source =
665    "/* Meow \\verbatim aaa\\$\\@\n"
666    "bbb \\endverbati\r"
667    "ccc\r\n"
668    "ddd \\endverbatim Blah \\verbatim eee\n"
669    "\\endverbatim BlahBlah*/";
670  std::vector<Token> Toks;
671
672  lexString(Source, Toks);
673
674  ASSERT_EQ(14U, Toks.size());
675
676  ASSERT_EQ(tok::text,                 Toks[0].getKind());
677  ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
678
679  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
680  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
681
682  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
683  ASSERT_EQ(StringRef(" aaa\\$\\@"),   Toks[2].getVerbatimBlockText());
684
685  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
686  ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
687
688  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
689  ASSERT_EQ(StringRef("ccc"),          Toks[4].getVerbatimBlockText());
690
691  ASSERT_EQ(tok::verbatim_block_line,  Toks[5].getKind());
692  ASSERT_EQ(StringRef("ddd "),         Toks[5].getVerbatimBlockText());
693
694  ASSERT_EQ(tok::verbatim_block_end,   Toks[6].getKind());
695  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[6]));
696
697  ASSERT_EQ(tok::text,                 Toks[7].getKind());
698  ASSERT_EQ(StringRef(" Blah "),       Toks[7].getText());
699
700  ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
701  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[8]));
702
703  ASSERT_EQ(tok::verbatim_block_line,  Toks[9].getKind());
704  ASSERT_EQ(StringRef(" eee"),         Toks[9].getVerbatimBlockText());
705
706  ASSERT_EQ(tok::verbatim_block_end,   Toks[10].getKind());
707  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[10]));
708
709  ASSERT_EQ(tok::text,                 Toks[11].getKind());
710  ASSERT_EQ(StringRef(" BlahBlah"),    Toks[11].getText());
711
712  ASSERT_EQ(tok::newline,              Toks[12].getKind());
713  ASSERT_EQ(tok::newline,              Toks[13].getKind());
714}
715
716// LaTeX verbatim blocks.
717TEST_F(CommentLexerTest, VerbatimBlock9) {
718  const char *Source =
719    "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
720  std::vector<Token> Toks;
721
722  lexString(Source, Toks);
723
724  ASSERT_EQ(13U, Toks.size());
725
726  ASSERT_EQ(tok::text,                 Toks[0].getKind());
727  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
728
729  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
730  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[1]));
731
732  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
733  ASSERT_EQ(StringRef(" Aaa "),        Toks[2].getVerbatimBlockText());
734
735  ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
736  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[3]));
737
738  ASSERT_EQ(tok::text,                 Toks[4].getKind());
739  ASSERT_EQ(StringRef(" "),            Toks[4].getText());
740
741  ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
742  ASSERT_EQ(StringRef("f["),           getVerbatimBlockName(Toks[5]));
743
744  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
745  ASSERT_EQ(StringRef(" Bbb "),        Toks[6].getVerbatimBlockText());
746
747  ASSERT_EQ(tok::verbatim_block_end,   Toks[7].getKind());
748  ASSERT_EQ(StringRef("f]"),           getVerbatimBlockName(Toks[7]));
749
750  ASSERT_EQ(tok::text,                 Toks[8].getKind());
751  ASSERT_EQ(StringRef(" "),            Toks[8].getText());
752
753  ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
754  ASSERT_EQ(StringRef("f{"),           getVerbatimBlockName(Toks[9]));
755
756  ASSERT_EQ(tok::verbatim_block_line,  Toks[10].getKind());
757  ASSERT_EQ(StringRef(" Ccc "),        Toks[10].getVerbatimBlockText());
758
759  ASSERT_EQ(tok::verbatim_block_end,   Toks[11].getKind());
760  ASSERT_EQ(StringRef("f}"),           getVerbatimBlockName(Toks[11]));
761
762  ASSERT_EQ(tok::newline,              Toks[12].getKind());
763}
764
765// Empty verbatim line.
766TEST_F(CommentLexerTest, VerbatimLine1) {
767  const char *Sources[] = {
768    "/// \\fn\n//",
769    "/** \\fn*/"
770  };
771
772  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
773    std::vector<Token> Toks;
774
775    lexString(Sources[i], Toks);
776
777    ASSERT_EQ(4U, Toks.size());
778
779    ASSERT_EQ(tok::text,               Toks[0].getKind());
780    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
781
782    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
783    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
784
785    ASSERT_EQ(tok::newline,            Toks[2].getKind());
786    ASSERT_EQ(tok::newline,            Toks[3].getKind());
787  }
788}
789
790// Verbatim line with Doxygen escape sequences, which should not be expanded.
791TEST_F(CommentLexerTest, VerbatimLine2) {
792  const char *Sources[] = {
793    "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
794    "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
795  };
796
797  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
798    std::vector<Token> Toks;
799
800    lexString(Sources[i], Toks);
801
802    ASSERT_EQ(5U, Toks.size());
803
804    ASSERT_EQ(tok::text,               Toks[0].getKind());
805    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
806
807    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
808    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
809
810    ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
811    ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
812                                       Toks[2].getVerbatimLineText());
813
814    ASSERT_EQ(tok::newline,            Toks[3].getKind());
815    ASSERT_EQ(tok::newline,            Toks[4].getKind());
816  }
817}
818
819// Verbatim line should not eat anything from next source line.
820TEST_F(CommentLexerTest, VerbatimLine3) {
821  const char *Source =
822    "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
823    " * Meow\n"
824    " */";
825
826  std::vector<Token> Toks;
827
828  lexString(Source, Toks);
829
830  ASSERT_EQ(9U, Toks.size());
831
832  ASSERT_EQ(tok::text,               Toks[0].getKind());
833  ASSERT_EQ(StringRef(" "),          Toks[0].getText());
834
835  ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
836  ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
837
838  ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
839  ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
840                                     Toks[2].getVerbatimLineText());
841  ASSERT_EQ(tok::newline,            Toks[3].getKind());
842
843  ASSERT_EQ(tok::text,               Toks[4].getKind());
844  ASSERT_EQ(StringRef(" Meow"),      Toks[4].getText());
845  ASSERT_EQ(tok::newline,            Toks[5].getKind());
846
847  ASSERT_EQ(tok::text,               Toks[6].getKind());
848  ASSERT_EQ(StringRef(" "),          Toks[6].getText());
849
850  ASSERT_EQ(tok::newline,            Toks[7].getKind());
851  ASSERT_EQ(tok::newline,            Toks[8].getKind());
852}
853
854TEST_F(CommentLexerTest, HTML1) {
855  const char *Source =
856    "// <";
857
858  std::vector<Token> Toks;
859
860  lexString(Source, Toks);
861
862  ASSERT_EQ(3U, Toks.size());
863
864  ASSERT_EQ(tok::text,      Toks[0].getKind());
865  ASSERT_EQ(StringRef(" "), Toks[0].getText());
866
867  ASSERT_EQ(tok::text,      Toks[1].getKind());
868  ASSERT_EQ(StringRef("<"), Toks[1].getText());
869
870  ASSERT_EQ(tok::newline,   Toks[2].getKind());
871}
872
873TEST_F(CommentLexerTest, HTML2) {
874  const char *Source =
875    "// a<2";
876
877  std::vector<Token> Toks;
878
879  lexString(Source, Toks);
880
881  ASSERT_EQ(4U, Toks.size());
882
883  ASSERT_EQ(tok::text,       Toks[0].getKind());
884  ASSERT_EQ(StringRef(" a"), Toks[0].getText());
885
886  ASSERT_EQ(tok::text,       Toks[1].getKind());
887  ASSERT_EQ(StringRef("<"),  Toks[1].getText());
888
889  ASSERT_EQ(tok::text,       Toks[2].getKind());
890  ASSERT_EQ(StringRef("2"),  Toks[2].getText());
891
892  ASSERT_EQ(tok::newline,    Toks[3].getKind());
893}
894
895TEST_F(CommentLexerTest, HTML3) {
896  const char *Source =
897    "// < img";
898
899  std::vector<Token> Toks;
900
901  lexString(Source, Toks);
902
903  ASSERT_EQ(4U, Toks.size());
904
905  ASSERT_EQ(tok::text,         Toks[0].getKind());
906  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
907
908  ASSERT_EQ(tok::text,         Toks[1].getKind());
909  ASSERT_EQ(StringRef("<"),    Toks[1].getText());
910
911  ASSERT_EQ(tok::text,         Toks[2].getKind());
912  ASSERT_EQ(StringRef(" img"), Toks[2].getText());
913
914  ASSERT_EQ(tok::newline,      Toks[3].getKind());
915}
916
917TEST_F(CommentLexerTest, HTML4) {
918  const char *Sources[] = {
919    "// <img",
920    "// <img "
921  };
922
923  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
924    std::vector<Token> Toks;
925
926    lexString(Sources[i], Toks);
927
928    ASSERT_EQ(3U, Toks.size());
929
930    ASSERT_EQ(tok::text,           Toks[0].getKind());
931    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
932
933    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
934    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
935
936    ASSERT_EQ(tok::newline,        Toks[2].getKind());
937  }
938}
939
940TEST_F(CommentLexerTest, HTML5) {
941  const char *Source =
942    "// <img 42";
943
944  std::vector<Token> Toks;
945
946  lexString(Source, Toks);
947
948  ASSERT_EQ(4U, Toks.size());
949
950  ASSERT_EQ(tok::text,           Toks[0].getKind());
951  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
952
953  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
954  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
955
956  ASSERT_EQ(tok::text,           Toks[2].getKind());
957  ASSERT_EQ(StringRef("42"),     Toks[2].getText());
958
959  ASSERT_EQ(tok::newline,        Toks[3].getKind());
960}
961
962TEST_F(CommentLexerTest, HTML6) {
963  const char *Source = "// <img> Meow";
964
965  std::vector<Token> Toks;
966
967  lexString(Source, Toks);
968
969  ASSERT_EQ(5U, Toks.size());
970
971  ASSERT_EQ(tok::text,           Toks[0].getKind());
972  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
973
974  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
975  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
976
977  ASSERT_EQ(tok::html_greater,   Toks[2].getKind());
978
979  ASSERT_EQ(tok::text,           Toks[3].getKind());
980  ASSERT_EQ(StringRef(" Meow"),  Toks[3].getText());
981
982  ASSERT_EQ(tok::newline,        Toks[4].getKind());
983}
984
985TEST_F(CommentLexerTest, HTML7) {
986  const char *Source = "// <img=";
987
988  std::vector<Token> Toks;
989
990  lexString(Source, Toks);
991
992  ASSERT_EQ(4U, Toks.size());
993
994  ASSERT_EQ(tok::text,           Toks[0].getKind());
995  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
996
997  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
998  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
999
1000  ASSERT_EQ(tok::text,           Toks[2].getKind());
1001  ASSERT_EQ(StringRef("="),      Toks[2].getText());
1002
1003  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1004}
1005
1006TEST_F(CommentLexerTest, HTML8) {
1007  const char *Source = "// <img src=> Meow";
1008
1009  std::vector<Token> Toks;
1010
1011  lexString(Source, Toks);
1012
1013  ASSERT_EQ(7U, Toks.size());
1014
1015  ASSERT_EQ(tok::text,           Toks[0].getKind());
1016  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1017
1018  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1019  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1020
1021  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1022  ASSERT_EQ(StringRef("src"),   Toks[2].getHTMLIdent());
1023
1024  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1025
1026  ASSERT_EQ(tok::html_greater,   Toks[4].getKind());
1027
1028  ASSERT_EQ(tok::text,           Toks[5].getKind());
1029  ASSERT_EQ(StringRef(" Meow"),  Toks[5].getText());
1030
1031  ASSERT_EQ(tok::newline,        Toks[6].getKind());
1032}
1033
1034TEST_F(CommentLexerTest, HTML9) {
1035  const char *Sources[] = {
1036    "// <img src",
1037    "// <img src "
1038  };
1039
1040  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1041    std::vector<Token> Toks;
1042
1043    lexString(Sources[i], Toks);
1044
1045    ASSERT_EQ(4U, Toks.size());
1046
1047    ASSERT_EQ(tok::text,           Toks[0].getKind());
1048    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1049
1050    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1051    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1052
1053    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1054    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1055
1056    ASSERT_EQ(tok::newline,        Toks[3].getKind());
1057  }
1058}
1059
1060TEST_F(CommentLexerTest, HTML10) {
1061  const char *Sources[] = {
1062    "// <img src=",
1063    "// <img src ="
1064  };
1065
1066  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1067    std::vector<Token> Toks;
1068
1069    lexString(Sources[i], Toks);
1070
1071    ASSERT_EQ(5U, Toks.size());
1072
1073    ASSERT_EQ(tok::text,           Toks[0].getKind());
1074    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1075
1076    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1077    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1078
1079    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1080    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1081
1082    ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1083
1084    ASSERT_EQ(tok::newline,        Toks[4].getKind());
1085  }
1086}
1087
1088TEST_F(CommentLexerTest, HTML11) {
1089  const char *Sources[] = {
1090    "// <img src=\"",
1091    "// <img src = \"",
1092    "// <img src=\'",
1093    "// <img src = \'"
1094  };
1095
1096  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1097    std::vector<Token> Toks;
1098
1099    lexString(Sources[i], Toks);
1100
1101    ASSERT_EQ(6U, Toks.size());
1102
1103    ASSERT_EQ(tok::text,               Toks[0].getKind());
1104    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1105
1106    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1107    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1108
1109    ASSERT_EQ(tok::html_ident,         Toks[2].getKind());
1110    ASSERT_EQ(StringRef("src"),        Toks[2].getHTMLIdent());
1111
1112    ASSERT_EQ(tok::html_equals,        Toks[3].getKind());
1113
1114    ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1115    ASSERT_EQ(StringRef(""),           Toks[4].getHTMLQuotedString());
1116
1117    ASSERT_EQ(tok::newline,            Toks[5].getKind());
1118  }
1119}
1120
1121TEST_F(CommentLexerTest, HTML12) {
1122  const char *Source = "// <img src=@";
1123
1124  std::vector<Token> Toks;
1125
1126  lexString(Source, Toks);
1127
1128  ASSERT_EQ(6U, Toks.size());
1129
1130  ASSERT_EQ(tok::text,           Toks[0].getKind());
1131  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1132
1133  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1134  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1135
1136  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1137  ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1138
1139  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1140
1141  ASSERT_EQ(tok::text,           Toks[4].getKind());
1142  ASSERT_EQ(StringRef("@"),      Toks[4].getText());
1143
1144  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1145}
1146
1147TEST_F(CommentLexerTest, HTML13) {
1148  const char *Sources[] = {
1149    "// <img src=\"val\\\"\\'val",
1150    "// <img src=\"val\\\"\\'val\"",
1151    "// <img src=\'val\\\"\\'val",
1152    "// <img src=\'val\\\"\\'val\'"
1153  };
1154
1155  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1156    std::vector<Token> Toks;
1157
1158    lexString(Sources[i], Toks);
1159
1160    ASSERT_EQ(6U, Toks.size());
1161
1162    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1163    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1164
1165    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1166    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1167
1168    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1169    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1170
1171    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1172
1173    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1174    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1175
1176    ASSERT_EQ(tok::newline,               Toks[5].getKind());
1177  }
1178}
1179
1180TEST_F(CommentLexerTest, HTML14) {
1181  const char *Sources[] = {
1182    "// <img src=\"val\\\"\\'val\">",
1183    "// <img src=\'val\\\"\\'val\'>"
1184  };
1185
1186  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1187    std::vector<Token> Toks;
1188
1189    lexString(Sources[i], Toks);
1190
1191    ASSERT_EQ(7U, Toks.size());
1192
1193    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1194    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1195
1196    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1197    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1198
1199    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1200    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1201
1202    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1203
1204    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1205    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1206
1207    ASSERT_EQ(tok::html_greater,          Toks[5].getKind());
1208
1209    ASSERT_EQ(tok::newline,               Toks[6].getKind());
1210  }
1211}
1212
1213TEST_F(CommentLexerTest, HTML15) {
1214  const char *Sources[] = {
1215    "// <img/>",
1216    "// <img />"
1217  };
1218
1219  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1220    std::vector<Token> Toks;
1221
1222    lexString(Sources[i], Toks);
1223
1224    ASSERT_EQ(4U, Toks.size());
1225
1226    ASSERT_EQ(tok::text,               Toks[0].getKind());
1227    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1228
1229    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1230    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1231
1232    ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1233
1234    ASSERT_EQ(tok::newline,            Toks[3].getKind());
1235  }
1236}
1237
1238TEST_F(CommentLexerTest, HTML16) {
1239  const char *Sources[] = {
1240    "// <img/ Aaa",
1241    "// <img / Aaa"
1242  };
1243
1244  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1245    std::vector<Token> Toks;
1246
1247    lexString(Sources[i], Toks);
1248
1249    ASSERT_EQ(5U, Toks.size());
1250
1251    ASSERT_EQ(tok::text,               Toks[0].getKind());
1252    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1253
1254    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1255    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1256
1257    ASSERT_EQ(tok::text,               Toks[2].getKind());
1258    ASSERT_EQ(StringRef("/"),          Toks[2].getText());
1259
1260    ASSERT_EQ(tok::text,               Toks[3].getKind());
1261    ASSERT_EQ(StringRef(" Aaa"),       Toks[3].getText());
1262
1263    ASSERT_EQ(tok::newline,            Toks[4].getKind());
1264  }
1265}
1266
1267TEST_F(CommentLexerTest, HTML17) {
1268  const char *Source = "// </";
1269
1270  std::vector<Token> Toks;
1271
1272  lexString(Source, Toks);
1273
1274  ASSERT_EQ(3U, Toks.size());
1275
1276  ASSERT_EQ(tok::text,       Toks[0].getKind());
1277  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1278
1279  ASSERT_EQ(tok::text,       Toks[1].getKind());
1280  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1281
1282  ASSERT_EQ(tok::newline,    Toks[2].getKind());
1283}
1284
1285TEST_F(CommentLexerTest, HTML18) {
1286  const char *Source = "// </@";
1287
1288  std::vector<Token> Toks;
1289
1290  lexString(Source, Toks);
1291
1292  ASSERT_EQ(4U, Toks.size());
1293
1294  ASSERT_EQ(tok::text,       Toks[0].getKind());
1295  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1296
1297  ASSERT_EQ(tok::text,       Toks[1].getKind());
1298  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1299
1300  ASSERT_EQ(tok::text,       Toks[2].getKind());
1301  ASSERT_EQ(StringRef("@"),  Toks[2].getText());
1302
1303  ASSERT_EQ(tok::newline,    Toks[3].getKind());
1304}
1305
1306TEST_F(CommentLexerTest, HTML19) {
1307  const char *Source = "// </img";
1308
1309  std::vector<Token> Toks;
1310
1311  lexString(Source, Toks);
1312
1313  ASSERT_EQ(3U, Toks.size());
1314
1315  ASSERT_EQ(tok::text,         Toks[0].getKind());
1316  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1317
1318  ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1319  ASSERT_EQ(StringRef("img"),  Toks[1].getHTMLTagEndName());
1320
1321  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1322}
1323
1324TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1325  const char *Source = "// <tag>";
1326
1327  std::vector<Token> Toks;
1328
1329  lexString(Source, Toks);
1330
1331  ASSERT_EQ(4U, Toks.size());
1332
1333  ASSERT_EQ(tok::text,         Toks[0].getKind());
1334  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1335
1336  ASSERT_EQ(tok::text,         Toks[1].getKind());
1337  ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1338
1339  ASSERT_EQ(tok::text,         Toks[2].getKind());
1340  ASSERT_EQ(StringRef(">"),    Toks[2].getText());
1341
1342  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1343}
1344
1345TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1346  const char *Source = "// </tag>";
1347
1348  std::vector<Token> Toks;
1349
1350  lexString(Source, Toks);
1351
1352  ASSERT_EQ(4U, Toks.size());
1353
1354  ASSERT_EQ(tok::text,          Toks[0].getKind());
1355  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1356
1357  ASSERT_EQ(tok::text,          Toks[1].getKind());
1358  ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1359
1360  ASSERT_EQ(tok::text,          Toks[2].getKind());
1361  ASSERT_EQ(StringRef(">"),     Toks[2].getText());
1362
1363  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1364}
1365
1366TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1367  const char *Source = "// &";
1368
1369  std::vector<Token> Toks;
1370
1371  lexString(Source, Toks);
1372
1373  ASSERT_EQ(3U, Toks.size());
1374
1375  ASSERT_EQ(tok::text,         Toks[0].getKind());
1376  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1377
1378  ASSERT_EQ(tok::text,         Toks[1].getKind());
1379  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1380
1381  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1382}
1383
1384TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1385  const char *Source = "// &!";
1386
1387  std::vector<Token> Toks;
1388
1389  lexString(Source, Toks);
1390
1391  ASSERT_EQ(4U, Toks.size());
1392
1393  ASSERT_EQ(tok::text,         Toks[0].getKind());
1394  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1395
1396  ASSERT_EQ(tok::text,         Toks[1].getKind());
1397  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1398
1399  ASSERT_EQ(tok::text,         Toks[2].getKind());
1400  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1401
1402  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1403}
1404
1405TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1406  const char *Source = "// &amp";
1407
1408  std::vector<Token> Toks;
1409
1410  lexString(Source, Toks);
1411
1412  ASSERT_EQ(3U, Toks.size());
1413
1414  ASSERT_EQ(tok::text,         Toks[0].getKind());
1415  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1416
1417  ASSERT_EQ(tok::text,         Toks[1].getKind());
1418  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1419
1420  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1421}
1422
1423TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1424  const char *Source = "// &amp!";
1425
1426  std::vector<Token> Toks;
1427
1428  lexString(Source, Toks);
1429
1430  ASSERT_EQ(4U, Toks.size());
1431
1432  ASSERT_EQ(tok::text,         Toks[0].getKind());
1433  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1434
1435  ASSERT_EQ(tok::text,         Toks[1].getKind());
1436  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1437
1438  ASSERT_EQ(tok::text,         Toks[2].getKind());
1439  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1440
1441  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1442}
1443
1444TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1445  const char *Source = "// &#";
1446
1447  std::vector<Token> Toks;
1448
1449  lexString(Source, Toks);
1450
1451  ASSERT_EQ(3U, Toks.size());
1452
1453  ASSERT_EQ(tok::text,         Toks[0].getKind());
1454  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1455
1456  ASSERT_EQ(tok::text,         Toks[1].getKind());
1457  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1458
1459  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1460}
1461
1462TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1463  const char *Source = "// &#a";
1464
1465  std::vector<Token> Toks;
1466
1467  lexString(Source, Toks);
1468
1469  ASSERT_EQ(4U, Toks.size());
1470
1471  ASSERT_EQ(tok::text,         Toks[0].getKind());
1472  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1473
1474  ASSERT_EQ(tok::text,         Toks[1].getKind());
1475  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1476
1477  ASSERT_EQ(tok::text,         Toks[2].getKind());
1478  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1479
1480  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1481}
1482
1483TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1484  const char *Source = "// &#42";
1485
1486  std::vector<Token> Toks;
1487
1488  lexString(Source, Toks);
1489
1490  ASSERT_EQ(3U, Toks.size());
1491
1492  ASSERT_EQ(tok::text,         Toks[0].getKind());
1493  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1494
1495  ASSERT_EQ(tok::text,         Toks[1].getKind());
1496  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1497
1498  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1499}
1500
1501TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1502  const char *Source = "// &#42a";
1503
1504  std::vector<Token> Toks;
1505
1506  lexString(Source, Toks);
1507
1508  ASSERT_EQ(4U, Toks.size());
1509
1510  ASSERT_EQ(tok::text,         Toks[0].getKind());
1511  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1512
1513  ASSERT_EQ(tok::text,         Toks[1].getKind());
1514  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1515
1516  ASSERT_EQ(tok::text,         Toks[2].getKind());
1517  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1518
1519  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1520}
1521
1522TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1523  const char *Source = "// &#x";
1524
1525  std::vector<Token> Toks;
1526
1527  lexString(Source, Toks);
1528
1529  ASSERT_EQ(3U, Toks.size());
1530
1531  ASSERT_EQ(tok::text,         Toks[0].getKind());
1532  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1533
1534  ASSERT_EQ(tok::text,         Toks[1].getKind());
1535  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1536
1537  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1538}
1539
1540TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1541  const char *Source = "// &#xz";
1542
1543  std::vector<Token> Toks;
1544
1545  lexString(Source, Toks);
1546
1547  ASSERT_EQ(4U, Toks.size());
1548
1549  ASSERT_EQ(tok::text,         Toks[0].getKind());
1550  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1551
1552  ASSERT_EQ(tok::text,         Toks[1].getKind());
1553  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1554
1555  ASSERT_EQ(tok::text,         Toks[2].getKind());
1556  ASSERT_EQ(StringRef("z"),    Toks[2].getText());
1557
1558  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1559}
1560
1561TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1562  const char *Source = "// &#xab";
1563
1564  std::vector<Token> Toks;
1565
1566  lexString(Source, Toks);
1567
1568  ASSERT_EQ(3U, Toks.size());
1569
1570  ASSERT_EQ(tok::text,          Toks[0].getKind());
1571  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1572
1573  ASSERT_EQ(tok::text,          Toks[1].getKind());
1574  ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1575
1576  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1577}
1578
1579TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1580  const char *Source = "// &#xaBz";
1581
1582  std::vector<Token> Toks;
1583
1584  lexString(Source, Toks);
1585
1586  ASSERT_EQ(4U, Toks.size());
1587
1588  ASSERT_EQ(tok::text,          Toks[0].getKind());
1589  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1590
1591  ASSERT_EQ(tok::text,          Toks[1].getKind());
1592  ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1593
1594  ASSERT_EQ(tok::text,          Toks[2].getKind());
1595  ASSERT_EQ(StringRef("z"),     Toks[2].getText());
1596
1597  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1598}
1599
1600TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1601  const char *Source = "// &amp;";
1602
1603  std::vector<Token> Toks;
1604
1605  lexString(Source, Toks);
1606
1607  ASSERT_EQ(3U, Toks.size());
1608
1609  ASSERT_EQ(tok::text,          Toks[0].getKind());
1610  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1611
1612  ASSERT_EQ(tok::text,          Toks[1].getKind());
1613  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1614
1615  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1616}
1617
1618TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1619  const char *Source = "// &amp;&lt;";
1620
1621  std::vector<Token> Toks;
1622
1623  lexString(Source, Toks);
1624
1625  ASSERT_EQ(4U, Toks.size());
1626
1627  ASSERT_EQ(tok::text,          Toks[0].getKind());
1628  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1629
1630  ASSERT_EQ(tok::text,          Toks[1].getKind());
1631  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1632
1633  ASSERT_EQ(tok::text,          Toks[2].getKind());
1634  ASSERT_EQ(StringRef("<"),     Toks[2].getText());
1635
1636  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1637}
1638
1639TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1640  const char *Source = "// &amp; meow";
1641
1642  std::vector<Token> Toks;
1643
1644  lexString(Source, Toks);
1645
1646  ASSERT_EQ(4U, Toks.size());
1647
1648  ASSERT_EQ(tok::text,          Toks[0].getKind());
1649  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1650
1651  ASSERT_EQ(tok::text,          Toks[1].getKind());
1652  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1653
1654  ASSERT_EQ(tok::text,          Toks[2].getKind());
1655  ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1656
1657  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1658}
1659
1660TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1661  const char *Sources[] = {
1662    "// &#61;",
1663    "// &#x3d;",
1664    "// &#X3d;",
1665    "// &#X3D;"
1666  };
1667
1668  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1669    std::vector<Token> Toks;
1670
1671    lexString(Sources[i], Toks);
1672
1673    ASSERT_EQ(3U, Toks.size());
1674
1675    ASSERT_EQ(tok::text,          Toks[0].getKind());
1676    ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1677
1678    ASSERT_EQ(tok::text,          Toks[1].getKind());
1679    ASSERT_EQ(StringRef("="),     Toks[1].getText());
1680
1681    ASSERT_EQ(tok::newline,       Toks[2].getKind());
1682  }
1683}
1684
1685TEST_F(CommentLexerTest, MultipleComments) {
1686  const char *Source =
1687    "// Aaa\n"
1688    "/// Bbb\n"
1689    "/* Ccc\n"
1690    " * Ddd*/\n"
1691    "/** Eee*/";
1692
1693  std::vector<Token> Toks;
1694
1695  lexString(Source, Toks);
1696
1697  ASSERT_EQ(12U, Toks.size());
1698
1699  ASSERT_EQ(tok::text,           Toks[0].getKind());
1700  ASSERT_EQ(StringRef(" Aaa"),   Toks[0].getText());
1701  ASSERT_EQ(tok::newline,        Toks[1].getKind());
1702
1703  ASSERT_EQ(tok::text,           Toks[2].getKind());
1704  ASSERT_EQ(StringRef(" Bbb"),   Toks[2].getText());
1705  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1706
1707  ASSERT_EQ(tok::text,           Toks[4].getKind());
1708  ASSERT_EQ(StringRef(" Ccc"),   Toks[4].getText());
1709  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1710
1711  ASSERT_EQ(tok::text,           Toks[6].getKind());
1712  ASSERT_EQ(StringRef(" Ddd"),   Toks[6].getText());
1713  ASSERT_EQ(tok::newline,        Toks[7].getKind());
1714  ASSERT_EQ(tok::newline,        Toks[8].getKind());
1715
1716  ASSERT_EQ(tok::text,           Toks[9].getKind());
1717  ASSERT_EQ(StringRef(" Eee"),   Toks[9].getText());
1718
1719  ASSERT_EQ(tok::newline,        Toks[10].getKind());
1720  ASSERT_EQ(tok::newline,        Toks[11].getKind());
1721}
1722
1723} // end namespace comments
1724} // end namespace clang
1725
1726