1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentLexer.h"
11#include "clang/AST/CommentCommandTraits.h"
12#include "clang/Basic/CommentOptions.h"
13#include "clang/Basic/Diagnostic.h"
14#include "clang/Basic/DiagnosticOptions.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/SourceManager.h"
17#include "llvm/ADT/STLExtras.h"
18#include "gtest/gtest.h"
19#include <vector>
20
21using namespace llvm;
22using namespace clang;
23
24namespace clang {
25namespace comments {
26
27namespace {
28class CommentLexerTest : public ::testing::Test {
29protected:
30  CommentLexerTest()
31    : FileMgr(FileMgrOpts),
32      DiagID(new DiagnosticIDs()),
33      Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
34      SourceMgr(Diags, FileMgr),
35      Traits(Allocator, CommentOptions()) {
36  }
37
38  FileSystemOptions FileMgrOpts;
39  FileManager FileMgr;
40  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
41  DiagnosticsEngine Diags;
42  SourceManager SourceMgr;
43  llvm::BumpPtrAllocator Allocator;
44  CommandTraits Traits;
45
46  void lexString(const char *Source, std::vector<Token> &Toks);
47
48  StringRef getCommandName(const Token &Tok) {
49    return Traits.getCommandInfo(Tok.getCommandID())->Name;
50  }
51
52  StringRef getVerbatimBlockName(const Token &Tok) {
53    return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
54  }
55
56  StringRef getVerbatimLineName(const Token &Tok) {
57    return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
58  }
59};
60
61void CommentLexerTest::lexString(const char *Source,
62                                 std::vector<Token> &Toks) {
63  MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
64  FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
65  SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
66
67  Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
68
69  while (1) {
70    Token Tok;
71    L.lex(Tok);
72    if (Tok.is(tok::eof))
73      break;
74    Toks.push_back(Tok);
75  }
76}
77
78} // unnamed namespace
79
80// Empty source range should be handled.
81TEST_F(CommentLexerTest, Basic1) {
82  const char *Source = "";
83  std::vector<Token> Toks;
84
85  lexString(Source, Toks);
86
87  ASSERT_EQ(0U, Toks.size());
88}
89
90// Empty comments should be handled.
91TEST_F(CommentLexerTest, Basic2) {
92  const char *Sources[] = {
93    "//", "///", "//!", "///<", "//!<"
94  };
95  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
96    std::vector<Token> Toks;
97
98    lexString(Sources[i], Toks);
99
100    ASSERT_EQ(1U, Toks.size());
101
102    ASSERT_EQ(tok::newline, Toks[0].getKind());
103  }
104}
105
106// Empty comments should be handled.
107TEST_F(CommentLexerTest, Basic3) {
108  const char *Sources[] = {
109    "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
110  };
111  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
112    std::vector<Token> Toks;
113
114    lexString(Sources[i], Toks);
115
116    ASSERT_EQ(2U, Toks.size());
117
118    ASSERT_EQ(tok::newline, Toks[0].getKind());
119    ASSERT_EQ(tok::newline, Toks[1].getKind());
120  }
121}
122
123// Single comment with plain text.
124TEST_F(CommentLexerTest, Basic4) {
125  const char *Sources[] = {
126    "// Meow",   "/// Meow",    "//! Meow",
127    "// Meow\n", "// Meow\r\n", "//! Meow\r",
128  };
129
130  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
131    std::vector<Token> Toks;
132
133    lexString(Sources[i], Toks);
134
135    ASSERT_EQ(2U, Toks.size());
136
137    ASSERT_EQ(tok::text,          Toks[0].getKind());
138    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
139
140    ASSERT_EQ(tok::newline,       Toks[1].getKind());
141  }
142}
143
144// Single comment with plain text.
145TEST_F(CommentLexerTest, Basic5) {
146  const char *Sources[] = {
147    "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
148  };
149
150  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
151    std::vector<Token> Toks;
152
153    lexString(Sources[i], Toks);
154
155    ASSERT_EQ(3U, Toks.size());
156
157    ASSERT_EQ(tok::text,          Toks[0].getKind());
158    ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
159
160    ASSERT_EQ(tok::newline,       Toks[1].getKind());
161    ASSERT_EQ(tok::newline,       Toks[2].getKind());
162  }
163}
164
165// Test newline escaping.
166TEST_F(CommentLexerTest, Basic6) {
167  const char *Sources[] = {
168    "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
169    "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
170    "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
171  };
172
173  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
174    std::vector<Token> Toks;
175
176    lexString(Sources[i], Toks);
177
178    ASSERT_EQ(10U, Toks.size());
179
180    ASSERT_EQ(tok::text,         Toks[0].getKind());
181    ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
182    ASSERT_EQ(tok::text,         Toks[1].getKind());
183    ASSERT_EQ(StringRef("\\"),   Toks[1].getText());
184    ASSERT_EQ(tok::newline,      Toks[2].getKind());
185
186    ASSERT_EQ(tok::text,         Toks[3].getKind());
187    ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
188    ASSERT_EQ(tok::text,         Toks[4].getKind());
189    ASSERT_EQ(StringRef("\\"),   Toks[4].getText());
190    ASSERT_EQ(tok::text,         Toks[5].getKind());
191    ASSERT_EQ(StringRef(" "),    Toks[5].getText());
192    ASSERT_EQ(tok::newline,      Toks[6].getKind());
193
194    ASSERT_EQ(tok::text,         Toks[7].getKind());
195    ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
196    ASSERT_EQ(tok::newline,      Toks[8].getKind());
197
198    ASSERT_EQ(tok::newline,      Toks[9].getKind());
199  }
200}
201
202// Check that we skip C-style aligned stars correctly.
203TEST_F(CommentLexerTest, Basic7) {
204  const char *Source =
205    "/* Aaa\n"
206    " * Bbb\r\n"
207    "\t* Ccc\n"
208    "  ! Ddd\n"
209    "  * Eee\n"
210    "  ** Fff\n"
211    " */";
212  std::vector<Token> Toks;
213
214  lexString(Source, Toks);
215
216  ASSERT_EQ(15U, Toks.size());
217
218  ASSERT_EQ(tok::text,         Toks[0].getKind());
219  ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
220  ASSERT_EQ(tok::newline,      Toks[1].getKind());
221
222  ASSERT_EQ(tok::text,         Toks[2].getKind());
223  ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
224  ASSERT_EQ(tok::newline,      Toks[3].getKind());
225
226  ASSERT_EQ(tok::text,         Toks[4].getKind());
227  ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
228  ASSERT_EQ(tok::newline,      Toks[5].getKind());
229
230  ASSERT_EQ(tok::text,            Toks[6].getKind());
231  ASSERT_EQ(StringRef("  ! Ddd"), Toks[6].getText());
232  ASSERT_EQ(tok::newline,         Toks[7].getKind());
233
234  ASSERT_EQ(tok::text,         Toks[8].getKind());
235  ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
236  ASSERT_EQ(tok::newline,      Toks[9].getKind());
237
238  ASSERT_EQ(tok::text,          Toks[10].getKind());
239  ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
240  ASSERT_EQ(tok::newline,       Toks[11].getKind());
241
242  ASSERT_EQ(tok::text,         Toks[12].getKind());
243  ASSERT_EQ(StringRef(" "),    Toks[12].getText());
244
245  ASSERT_EQ(tok::newline,      Toks[13].getKind());
246  ASSERT_EQ(tok::newline,      Toks[14].getKind());
247}
248
249// A command marker followed by comment end.
250TEST_F(CommentLexerTest, DoxygenCommand1) {
251  const char *Sources[] = { "//@", "///@", "//!@" };
252  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
253    std::vector<Token> Toks;
254
255    lexString(Sources[i], Toks);
256
257    ASSERT_EQ(2U, Toks.size());
258
259    ASSERT_EQ(tok::text,          Toks[0].getKind());
260    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
261
262    ASSERT_EQ(tok::newline,       Toks[1].getKind());
263  }
264}
265
266// A command marker followed by comment end.
267TEST_F(CommentLexerTest, DoxygenCommand2) {
268  const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
269  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
270    std::vector<Token> Toks;
271
272    lexString(Sources[i], Toks);
273
274    ASSERT_EQ(3U, Toks.size());
275
276    ASSERT_EQ(tok::text,          Toks[0].getKind());
277    ASSERT_EQ(StringRef("@"),     Toks[0].getText());
278
279    ASSERT_EQ(tok::newline,       Toks[1].getKind());
280    ASSERT_EQ(tok::newline,       Toks[2].getKind());
281  }
282}
283
284// A command marker followed by comment end.
285TEST_F(CommentLexerTest, DoxygenCommand3) {
286  const char *Sources[] = { "/*\\*/", "/**\\*/" };
287  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
288    std::vector<Token> Toks;
289
290    lexString(Sources[i], Toks);
291
292    ASSERT_EQ(3U, Toks.size());
293
294    ASSERT_EQ(tok::text,           Toks[0].getKind());
295    ASSERT_EQ(StringRef("\\"),     Toks[0].getText());
296
297    ASSERT_EQ(tok::newline,        Toks[1].getKind());
298    ASSERT_EQ(tok::newline,        Toks[2].getKind());
299  }
300}
301
302// Doxygen escape sequences.
303TEST_F(CommentLexerTest, DoxygenCommand4) {
304  const char *Sources[] = {
305    "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
306    "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
307  };
308  const char *Text[] = {
309    " ",
310    "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
311    "<",  " ", ">", " ", "%", " ", "\"", " ", ".", " ",
312    "::", ""
313  };
314
315  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
316    std::vector<Token> Toks;
317
318    lexString(Sources[i], Toks);
319
320    ASSERT_EQ(array_lengthof(Text), Toks.size());
321
322    for (size_t j = 0, e = Toks.size(); j != e; j++) {
323      if(Toks[j].is(tok::text))
324        ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
325          << "index " << i;
326    }
327  }
328}
329
330// A command marker followed by a non-letter that is not a part of an escape
331// sequence.
332TEST_F(CommentLexerTest, DoxygenCommand5) {
333  const char *Source = "/// \\^ \\0";
334  std::vector<Token> Toks;
335
336  lexString(Source, Toks);
337
338  ASSERT_EQ(6U, Toks.size());
339
340  ASSERT_EQ(tok::text,       Toks[0].getKind());
341  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
342
343  ASSERT_EQ(tok::text,       Toks[1].getKind());
344  ASSERT_EQ(StringRef("\\"), Toks[1].getText());
345
346  ASSERT_EQ(tok::text,       Toks[2].getKind());
347  ASSERT_EQ(StringRef("^ "), Toks[2].getText());
348
349  ASSERT_EQ(tok::text,       Toks[3].getKind());
350  ASSERT_EQ(StringRef("\\"), Toks[3].getText());
351
352  ASSERT_EQ(tok::text,       Toks[4].getKind());
353  ASSERT_EQ(StringRef("0"),  Toks[4].getText());
354
355  ASSERT_EQ(tok::newline,    Toks[5].getKind());
356}
357
358TEST_F(CommentLexerTest, DoxygenCommand6) {
359  const char *Source = "/// \\brief Aaa.";
360  std::vector<Token> Toks;
361
362  lexString(Source, Toks);
363
364  ASSERT_EQ(4U, Toks.size());
365
366  ASSERT_EQ(tok::text,          Toks[0].getKind());
367  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
368
369  ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
370  ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
371
372  ASSERT_EQ(tok::text,          Toks[2].getKind());
373  ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
374
375  ASSERT_EQ(tok::newline,       Toks[3].getKind());
376}
377
378TEST_F(CommentLexerTest, DoxygenCommand7) {
379  const char *Source = "/// \\em\\em \\em\t\\em\n";
380  std::vector<Token> Toks;
381
382  lexString(Source, Toks);
383
384  ASSERT_EQ(8U, Toks.size());
385
386  ASSERT_EQ(tok::text,       Toks[0].getKind());
387  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
388
389  ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
390  ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
391
392  ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
393  ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
394
395  ASSERT_EQ(tok::text,       Toks[3].getKind());
396  ASSERT_EQ(StringRef(" "),  Toks[3].getText());
397
398  ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
399  ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
400
401  ASSERT_EQ(tok::text,       Toks[5].getKind());
402  ASSERT_EQ(StringRef("\t"), Toks[5].getText());
403
404  ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
405  ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
406
407  ASSERT_EQ(tok::newline,    Toks[7].getKind());
408}
409
410TEST_F(CommentLexerTest, DoxygenCommand8) {
411  const char *Source = "/// @em@em @em\t@em\n";
412  std::vector<Token> Toks;
413
414  lexString(Source, Toks);
415
416  ASSERT_EQ(8U, Toks.size());
417
418  ASSERT_EQ(tok::text,       Toks[0].getKind());
419  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
420
421  ASSERT_EQ(tok::at_command, Toks[1].getKind());
422  ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
423
424  ASSERT_EQ(tok::at_command, Toks[2].getKind());
425  ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
426
427  ASSERT_EQ(tok::text,       Toks[3].getKind());
428  ASSERT_EQ(StringRef(" "),  Toks[3].getText());
429
430  ASSERT_EQ(tok::at_command, Toks[4].getKind());
431  ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
432
433  ASSERT_EQ(tok::text,       Toks[5].getKind());
434  ASSERT_EQ(StringRef("\t"), Toks[5].getText());
435
436  ASSERT_EQ(tok::at_command, Toks[6].getKind());
437  ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
438
439  ASSERT_EQ(tok::newline,    Toks[7].getKind());
440}
441
442TEST_F(CommentLexerTest, DoxygenCommand9) {
443  const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
444  std::vector<Token> Toks;
445
446  lexString(Source, Toks);
447
448  ASSERT_EQ(8U, Toks.size());
449
450  ASSERT_EQ(tok::text,        Toks[0].getKind());
451  ASSERT_EQ(StringRef(" "),   Toks[0].getText());
452
453  ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
454  ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
455
456  ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
457  ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
458
459  ASSERT_EQ(tok::text,        Toks[3].getKind());
460  ASSERT_EQ(StringRef(" "),   Toks[3].getText());
461
462  ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
463  ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
464
465  ASSERT_EQ(tok::text,        Toks[5].getKind());
466  ASSERT_EQ(StringRef("\t"),  Toks[5].getText());
467
468  ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
469  ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
470
471  ASSERT_EQ(tok::newline,     Toks[7].getKind());
472}
473
474TEST_F(CommentLexerTest, DoxygenCommand10) {
475  const char *Source = "// \\c\n";
476  std::vector<Token> Toks;
477
478  lexString(Source, Toks);
479
480  ASSERT_EQ(3U, Toks.size());
481
482  ASSERT_EQ(tok::text,      Toks[0].getKind());
483  ASSERT_EQ(StringRef(" "), Toks[0].getText());
484
485  ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
486  ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
487
488  ASSERT_EQ(tok::newline,   Toks[2].getKind());
489}
490
491TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
492  const char *Source =
493    "/// \\NewBlockCommand Aaa.\n"
494    "/// @NewBlockCommand Aaa.\n";
495
496  Traits.registerBlockCommand(StringRef("NewBlockCommand"));
497
498  std::vector<Token> Toks;
499
500  lexString(Source, Toks);
501
502  ASSERT_EQ(8U, Toks.size());
503
504  ASSERT_EQ(tok::text,          Toks[0].getKind());
505  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
506
507  ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
508  ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
509
510  ASSERT_EQ(tok::text,          Toks[2].getKind());
511  ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
512
513  ASSERT_EQ(tok::newline,       Toks[3].getKind());
514
515  ASSERT_EQ(tok::text,          Toks[4].getKind());
516  ASSERT_EQ(StringRef(" "),     Toks[4].getText());
517
518  ASSERT_EQ(tok::at_command,    Toks[5].getKind());
519  ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
520
521  ASSERT_EQ(tok::text,          Toks[6].getKind());
522  ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
523
524  ASSERT_EQ(tok::newline,       Toks[7].getKind());
525}
526
527TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
528  const char *Source =
529    "/// \\Foo\n"
530    "/// \\Bar Baz\n"
531    "/// \\Blech quux=corge\n";
532
533  Traits.registerBlockCommand(StringRef("Foo"));
534  Traits.registerBlockCommand(StringRef("Bar"));
535  Traits.registerBlockCommand(StringRef("Blech"));
536
537  std::vector<Token> Toks;
538
539  lexString(Source, Toks);
540
541  ASSERT_EQ(11U, Toks.size());
542
543  ASSERT_EQ(tok::text,      Toks[0].getKind());
544  ASSERT_EQ(StringRef(" "), Toks[0].getText());
545
546  ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
547  ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
548
549  ASSERT_EQ(tok::newline,     Toks[2].getKind());
550
551  ASSERT_EQ(tok::text,      Toks[3].getKind());
552  ASSERT_EQ(StringRef(" "), Toks[3].getText());
553
554  ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
555  ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
556
557  ASSERT_EQ(tok::text,         Toks[5].getKind());
558  ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
559
560  ASSERT_EQ(tok::newline,     Toks[6].getKind());
561
562  ASSERT_EQ(tok::text,      Toks[7].getKind());
563  ASSERT_EQ(StringRef(" "), Toks[7].getText());
564
565  ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
566  ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
567
568  ASSERT_EQ(tok::text,                Toks[9].getKind());
569  ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
570
571  ASSERT_EQ(tok::newline,     Toks[10].getKind());
572}
573
574// Empty verbatim block.
575TEST_F(CommentLexerTest, VerbatimBlock1) {
576  const char *Sources[] = {
577    "/// \\verbatim\\endverbatim\n//",
578    "/** \\verbatim\\endverbatim*/"
579  };
580
581  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
582    std::vector<Token> Toks;
583
584    lexString(Sources[i], Toks);
585
586    ASSERT_EQ(5U, Toks.size());
587
588    ASSERT_EQ(tok::text,                 Toks[0].getKind());
589    ASSERT_EQ(StringRef(" "),            Toks[0].getText());
590
591    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
592    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
593
594    ASSERT_EQ(tok::verbatim_block_end,   Toks[2].getKind());
595    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[2]));
596
597    ASSERT_EQ(tok::newline,              Toks[3].getKind());
598    ASSERT_EQ(tok::newline,              Toks[4].getKind());
599  }
600}
601
602// Empty verbatim block without an end command.
603TEST_F(CommentLexerTest, VerbatimBlock2) {
604  const char *Source = "/// \\verbatim";
605
606  std::vector<Token> Toks;
607
608  lexString(Source, Toks);
609
610  ASSERT_EQ(3U, Toks.size());
611
612  ASSERT_EQ(tok::text,                 Toks[0].getKind());
613  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
614
615  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
616  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
617
618  ASSERT_EQ(tok::newline,              Toks[2].getKind());
619}
620
621// Empty verbatim block without an end command.
622TEST_F(CommentLexerTest, VerbatimBlock3) {
623  const char *Source = "/** \\verbatim*/";
624
625  std::vector<Token> Toks;
626
627  lexString(Source, Toks);
628
629  ASSERT_EQ(4U, Toks.size());
630
631  ASSERT_EQ(tok::text,                 Toks[0].getKind());
632  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
633
634  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
635  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
636
637  ASSERT_EQ(tok::newline,              Toks[2].getKind());
638  ASSERT_EQ(tok::newline,              Toks[3].getKind());
639}
640
641// Single-line verbatim block.
642TEST_F(CommentLexerTest, VerbatimBlock4) {
643  const char *Sources[] = {
644    "/// Meow \\verbatim aaa \\endverbatim\n//",
645    "/** Meow \\verbatim aaa \\endverbatim*/"
646  };
647
648  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
649    std::vector<Token> Toks;
650
651    lexString(Sources[i], Toks);
652
653    ASSERT_EQ(6U, Toks.size());
654
655    ASSERT_EQ(tok::text,                 Toks[0].getKind());
656    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
657
658    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
659    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
660
661    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
662    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
663
664    ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
665    ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[3]));
666
667    ASSERT_EQ(tok::newline,              Toks[4].getKind());
668    ASSERT_EQ(tok::newline,              Toks[5].getKind());
669  }
670}
671
672// Single-line verbatim block without an end command.
673TEST_F(CommentLexerTest, VerbatimBlock5) {
674  const char *Sources[] = {
675    "/// Meow \\verbatim aaa \n//",
676    "/** Meow \\verbatim aaa */"
677  };
678
679  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
680    std::vector<Token> Toks;
681
682    lexString(Sources[i], Toks);
683
684    ASSERT_EQ(5U, Toks.size());
685
686    ASSERT_EQ(tok::text,                 Toks[0].getKind());
687    ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
688
689    ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
690    ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
691
692    ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
693    ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
694
695    ASSERT_EQ(tok::newline,              Toks[3].getKind());
696    ASSERT_EQ(tok::newline,              Toks[4].getKind());
697  }
698}
699
700TEST_F(CommentLexerTest, VerbatimBlock6) {
701  const char *Source =
702    "// \\verbatim\n"
703    "// Aaa\n"
704    "//\n"
705    "// Bbb\n"
706    "// \\endverbatim\n";
707
708  std::vector<Token> Toks;
709
710  lexString(Source, Toks);
711
712  ASSERT_EQ(10U, Toks.size());
713
714  ASSERT_EQ(tok::text,                 Toks[0].getKind());
715  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
716
717  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
718  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
719
720  ASSERT_EQ(tok::newline,              Toks[2].getKind());
721
722  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
723  ASSERT_EQ(StringRef(" Aaa"),         Toks[3].getVerbatimBlockText());
724
725  ASSERT_EQ(tok::newline,              Toks[4].getKind());
726
727  ASSERT_EQ(tok::newline,              Toks[5].getKind());
728
729  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
730  ASSERT_EQ(StringRef(" Bbb"),         Toks[6].getVerbatimBlockText());
731
732  ASSERT_EQ(tok::newline,              Toks[7].getKind());
733
734  ASSERT_EQ(tok::verbatim_block_end,   Toks[8].getKind());
735  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[8]));
736
737  ASSERT_EQ(tok::newline,              Toks[9].getKind());
738}
739
740TEST_F(CommentLexerTest, VerbatimBlock7) {
741  const char *Source =
742    "/* \\verbatim\n"
743    " * Aaa\n"
744    " *\n"
745    " * Bbb\n"
746    " * \\endverbatim\n"
747    " */";
748
749  std::vector<Token> Toks;
750
751  lexString(Source, Toks);
752
753  ASSERT_EQ(10U, Toks.size());
754
755  ASSERT_EQ(tok::text,                 Toks[0].getKind());
756  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
757
758  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
759  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
760
761  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
762  ASSERT_EQ(StringRef(" Aaa"),         Toks[2].getVerbatimBlockText());
763
764  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
765  ASSERT_EQ(StringRef(""),             Toks[3].getVerbatimBlockText());
766
767  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
768  ASSERT_EQ(StringRef(" Bbb"),         Toks[4].getVerbatimBlockText());
769
770  ASSERT_EQ(tok::verbatim_block_end,   Toks[5].getKind());
771  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[5]));
772
773  ASSERT_EQ(tok::newline,              Toks[6].getKind());
774
775  ASSERT_EQ(tok::text,                 Toks[7].getKind());
776  ASSERT_EQ(StringRef(" "),            Toks[7].getText());
777
778  ASSERT_EQ(tok::newline,              Toks[8].getKind());
779  ASSERT_EQ(tok::newline,              Toks[9].getKind());
780}
781
782// Complex test for verbatim blocks.
783TEST_F(CommentLexerTest, VerbatimBlock8) {
784  const char *Source =
785    "/* Meow \\verbatim aaa\\$\\@\n"
786    "bbb \\endverbati\r"
787    "ccc\r\n"
788    "ddd \\endverbatim Blah \\verbatim eee\n"
789    "\\endverbatim BlahBlah*/";
790  std::vector<Token> Toks;
791
792  lexString(Source, Toks);
793
794  ASSERT_EQ(14U, Toks.size());
795
796  ASSERT_EQ(tok::text,                 Toks[0].getKind());
797  ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
798
799  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
800  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
801
802  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
803  ASSERT_EQ(StringRef(" aaa\\$\\@"),   Toks[2].getVerbatimBlockText());
804
805  ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
806  ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
807
808  ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
809  ASSERT_EQ(StringRef("ccc"),          Toks[4].getVerbatimBlockText());
810
811  ASSERT_EQ(tok::verbatim_block_line,  Toks[5].getKind());
812  ASSERT_EQ(StringRef("ddd "),         Toks[5].getVerbatimBlockText());
813
814  ASSERT_EQ(tok::verbatim_block_end,   Toks[6].getKind());
815  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[6]));
816
817  ASSERT_EQ(tok::text,                 Toks[7].getKind());
818  ASSERT_EQ(StringRef(" Blah "),       Toks[7].getText());
819
820  ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
821  ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[8]));
822
823  ASSERT_EQ(tok::verbatim_block_line,  Toks[9].getKind());
824  ASSERT_EQ(StringRef(" eee"),         Toks[9].getVerbatimBlockText());
825
826  ASSERT_EQ(tok::verbatim_block_end,   Toks[10].getKind());
827  ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[10]));
828
829  ASSERT_EQ(tok::text,                 Toks[11].getKind());
830  ASSERT_EQ(StringRef(" BlahBlah"),    Toks[11].getText());
831
832  ASSERT_EQ(tok::newline,              Toks[12].getKind());
833  ASSERT_EQ(tok::newline,              Toks[13].getKind());
834}
835
836// LaTeX verbatim blocks.
837TEST_F(CommentLexerTest, VerbatimBlock9) {
838  const char *Source =
839    "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
840  std::vector<Token> Toks;
841
842  lexString(Source, Toks);
843
844  ASSERT_EQ(13U, Toks.size());
845
846  ASSERT_EQ(tok::text,                 Toks[0].getKind());
847  ASSERT_EQ(StringRef(" "),            Toks[0].getText());
848
849  ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
850  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[1]));
851
852  ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
853  ASSERT_EQ(StringRef(" Aaa "),        Toks[2].getVerbatimBlockText());
854
855  ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
856  ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[3]));
857
858  ASSERT_EQ(tok::text,                 Toks[4].getKind());
859  ASSERT_EQ(StringRef(" "),            Toks[4].getText());
860
861  ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
862  ASSERT_EQ(StringRef("f["),           getVerbatimBlockName(Toks[5]));
863
864  ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
865  ASSERT_EQ(StringRef(" Bbb "),        Toks[6].getVerbatimBlockText());
866
867  ASSERT_EQ(tok::verbatim_block_end,   Toks[7].getKind());
868  ASSERT_EQ(StringRef("f]"),           getVerbatimBlockName(Toks[7]));
869
870  ASSERT_EQ(tok::text,                 Toks[8].getKind());
871  ASSERT_EQ(StringRef(" "),            Toks[8].getText());
872
873  ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
874  ASSERT_EQ(StringRef("f{"),           getVerbatimBlockName(Toks[9]));
875
876  ASSERT_EQ(tok::verbatim_block_line,  Toks[10].getKind());
877  ASSERT_EQ(StringRef(" Ccc "),        Toks[10].getVerbatimBlockText());
878
879  ASSERT_EQ(tok::verbatim_block_end,   Toks[11].getKind());
880  ASSERT_EQ(StringRef("f}"),           getVerbatimBlockName(Toks[11]));
881
882  ASSERT_EQ(tok::newline,              Toks[12].getKind());
883}
884
885// Empty verbatim line.
886TEST_F(CommentLexerTest, VerbatimLine1) {
887  const char *Sources[] = {
888    "/// \\fn\n//",
889    "/** \\fn*/"
890  };
891
892  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
893    std::vector<Token> Toks;
894
895    lexString(Sources[i], Toks);
896
897    ASSERT_EQ(4U, Toks.size());
898
899    ASSERT_EQ(tok::text,               Toks[0].getKind());
900    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
901
902    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
903    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
904
905    ASSERT_EQ(tok::newline,            Toks[2].getKind());
906    ASSERT_EQ(tok::newline,            Toks[3].getKind());
907  }
908}
909
910// Verbatim line with Doxygen escape sequences, which should not be expanded.
911TEST_F(CommentLexerTest, VerbatimLine2) {
912  const char *Sources[] = {
913    "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
914    "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
915  };
916
917  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
918    std::vector<Token> Toks;
919
920    lexString(Sources[i], Toks);
921
922    ASSERT_EQ(5U, Toks.size());
923
924    ASSERT_EQ(tok::text,               Toks[0].getKind());
925    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
926
927    ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
928    ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
929
930    ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
931    ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
932                                       Toks[2].getVerbatimLineText());
933
934    ASSERT_EQ(tok::newline,            Toks[3].getKind());
935    ASSERT_EQ(tok::newline,            Toks[4].getKind());
936  }
937}
938
939// Verbatim line should not eat anything from next source line.
940TEST_F(CommentLexerTest, VerbatimLine3) {
941  const char *Source =
942    "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
943    " * Meow\n"
944    " */";
945
946  std::vector<Token> Toks;
947
948  lexString(Source, Toks);
949
950  ASSERT_EQ(9U, Toks.size());
951
952  ASSERT_EQ(tok::text,               Toks[0].getKind());
953  ASSERT_EQ(StringRef(" "),          Toks[0].getText());
954
955  ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
956  ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
957
958  ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
959  ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
960                                     Toks[2].getVerbatimLineText());
961  ASSERT_EQ(tok::newline,            Toks[3].getKind());
962
963  ASSERT_EQ(tok::text,               Toks[4].getKind());
964  ASSERT_EQ(StringRef(" Meow"),      Toks[4].getText());
965  ASSERT_EQ(tok::newline,            Toks[5].getKind());
966
967  ASSERT_EQ(tok::text,               Toks[6].getKind());
968  ASSERT_EQ(StringRef(" "),          Toks[6].getText());
969
970  ASSERT_EQ(tok::newline,            Toks[7].getKind());
971  ASSERT_EQ(tok::newline,            Toks[8].getKind());
972}
973
974TEST_F(CommentLexerTest, HTML1) {
975  const char *Source =
976    "// <";
977
978  std::vector<Token> Toks;
979
980  lexString(Source, Toks);
981
982  ASSERT_EQ(3U, Toks.size());
983
984  ASSERT_EQ(tok::text,      Toks[0].getKind());
985  ASSERT_EQ(StringRef(" "), Toks[0].getText());
986
987  ASSERT_EQ(tok::text,      Toks[1].getKind());
988  ASSERT_EQ(StringRef("<"), Toks[1].getText());
989
990  ASSERT_EQ(tok::newline,   Toks[2].getKind());
991}
992
993TEST_F(CommentLexerTest, HTML2) {
994  const char *Source =
995    "// a<2";
996
997  std::vector<Token> Toks;
998
999  lexString(Source, Toks);
1000
1001  ASSERT_EQ(4U, Toks.size());
1002
1003  ASSERT_EQ(tok::text,       Toks[0].getKind());
1004  ASSERT_EQ(StringRef(" a"), Toks[0].getText());
1005
1006  ASSERT_EQ(tok::text,       Toks[1].getKind());
1007  ASSERT_EQ(StringRef("<"),  Toks[1].getText());
1008
1009  ASSERT_EQ(tok::text,       Toks[2].getKind());
1010  ASSERT_EQ(StringRef("2"),  Toks[2].getText());
1011
1012  ASSERT_EQ(tok::newline,    Toks[3].getKind());
1013}
1014
1015TEST_F(CommentLexerTest, HTML3) {
1016  const char *Source =
1017    "// < img";
1018
1019  std::vector<Token> Toks;
1020
1021  lexString(Source, Toks);
1022
1023  ASSERT_EQ(4U, Toks.size());
1024
1025  ASSERT_EQ(tok::text,         Toks[0].getKind());
1026  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1027
1028  ASSERT_EQ(tok::text,         Toks[1].getKind());
1029  ASSERT_EQ(StringRef("<"),    Toks[1].getText());
1030
1031  ASSERT_EQ(tok::text,         Toks[2].getKind());
1032  ASSERT_EQ(StringRef(" img"), Toks[2].getText());
1033
1034  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1035}
1036
1037TEST_F(CommentLexerTest, HTML4) {
1038  const char *Sources[] = {
1039    "// <img",
1040    "// <img "
1041  };
1042
1043  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1044    std::vector<Token> Toks;
1045
1046    lexString(Sources[i], Toks);
1047
1048    ASSERT_EQ(3U, Toks.size());
1049
1050    ASSERT_EQ(tok::text,           Toks[0].getKind());
1051    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1052
1053    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1054    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1055
1056    ASSERT_EQ(tok::newline,        Toks[2].getKind());
1057  }
1058}
1059
1060TEST_F(CommentLexerTest, HTML5) {
1061  const char *Source =
1062    "// <img 42";
1063
1064  std::vector<Token> Toks;
1065
1066  lexString(Source, Toks);
1067
1068  ASSERT_EQ(4U, Toks.size());
1069
1070  ASSERT_EQ(tok::text,           Toks[0].getKind());
1071  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1072
1073  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1074  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1075
1076  ASSERT_EQ(tok::text,           Toks[2].getKind());
1077  ASSERT_EQ(StringRef("42"),     Toks[2].getText());
1078
1079  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1080}
1081
1082TEST_F(CommentLexerTest, HTML6) {
1083  const char *Source = "// <img> Meow";
1084
1085  std::vector<Token> Toks;
1086
1087  lexString(Source, Toks);
1088
1089  ASSERT_EQ(5U, Toks.size());
1090
1091  ASSERT_EQ(tok::text,           Toks[0].getKind());
1092  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1093
1094  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1095  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1096
1097  ASSERT_EQ(tok::html_greater,   Toks[2].getKind());
1098
1099  ASSERT_EQ(tok::text,           Toks[3].getKind());
1100  ASSERT_EQ(StringRef(" Meow"),  Toks[3].getText());
1101
1102  ASSERT_EQ(tok::newline,        Toks[4].getKind());
1103}
1104
1105TEST_F(CommentLexerTest, HTML7) {
1106  const char *Source = "// <img=";
1107
1108  std::vector<Token> Toks;
1109
1110  lexString(Source, Toks);
1111
1112  ASSERT_EQ(4U, Toks.size());
1113
1114  ASSERT_EQ(tok::text,           Toks[0].getKind());
1115  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1116
1117  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1118  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1119
1120  ASSERT_EQ(tok::text,           Toks[2].getKind());
1121  ASSERT_EQ(StringRef("="),      Toks[2].getText());
1122
1123  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1124}
1125
1126TEST_F(CommentLexerTest, HTML8) {
1127  const char *Source = "// <img src=> Meow";
1128
1129  std::vector<Token> Toks;
1130
1131  lexString(Source, Toks);
1132
1133  ASSERT_EQ(7U, Toks.size());
1134
1135  ASSERT_EQ(tok::text,           Toks[0].getKind());
1136  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1137
1138  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1139  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1140
1141  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1142  ASSERT_EQ(StringRef("src"),   Toks[2].getHTMLIdent());
1143
1144  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1145
1146  ASSERT_EQ(tok::html_greater,   Toks[4].getKind());
1147
1148  ASSERT_EQ(tok::text,           Toks[5].getKind());
1149  ASSERT_EQ(StringRef(" Meow"),  Toks[5].getText());
1150
1151  ASSERT_EQ(tok::newline,        Toks[6].getKind());
1152}
1153
1154TEST_F(CommentLexerTest, HTML9) {
1155  const char *Sources[] = {
1156    "// <img src",
1157    "// <img src "
1158  };
1159
1160  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1161    std::vector<Token> Toks;
1162
1163    lexString(Sources[i], Toks);
1164
1165    ASSERT_EQ(4U, Toks.size());
1166
1167    ASSERT_EQ(tok::text,           Toks[0].getKind());
1168    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1169
1170    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1171    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1172
1173    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1174    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1175
1176    ASSERT_EQ(tok::newline,        Toks[3].getKind());
1177  }
1178}
1179
1180TEST_F(CommentLexerTest, HTML10) {
1181  const char *Sources[] = {
1182    "// <img src=",
1183    "// <img src ="
1184  };
1185
1186  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1187    std::vector<Token> Toks;
1188
1189    lexString(Sources[i], Toks);
1190
1191    ASSERT_EQ(5U, Toks.size());
1192
1193    ASSERT_EQ(tok::text,           Toks[0].getKind());
1194    ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1195
1196    ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1197    ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1198
1199    ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1200    ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1201
1202    ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1203
1204    ASSERT_EQ(tok::newline,        Toks[4].getKind());
1205  }
1206}
1207
1208TEST_F(CommentLexerTest, HTML11) {
1209  const char *Sources[] = {
1210    "// <img src=\"",
1211    "// <img src = \"",
1212    "// <img src=\'",
1213    "// <img src = \'"
1214  };
1215
1216  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1217    std::vector<Token> Toks;
1218
1219    lexString(Sources[i], Toks);
1220
1221    ASSERT_EQ(6U, Toks.size());
1222
1223    ASSERT_EQ(tok::text,               Toks[0].getKind());
1224    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1225
1226    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1227    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1228
1229    ASSERT_EQ(tok::html_ident,         Toks[2].getKind());
1230    ASSERT_EQ(StringRef("src"),        Toks[2].getHTMLIdent());
1231
1232    ASSERT_EQ(tok::html_equals,        Toks[3].getKind());
1233
1234    ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1235    ASSERT_EQ(StringRef(""),           Toks[4].getHTMLQuotedString());
1236
1237    ASSERT_EQ(tok::newline,            Toks[5].getKind());
1238  }
1239}
1240
1241TEST_F(CommentLexerTest, HTML12) {
1242  const char *Source = "// <img src=@";
1243
1244  std::vector<Token> Toks;
1245
1246  lexString(Source, Toks);
1247
1248  ASSERT_EQ(6U, Toks.size());
1249
1250  ASSERT_EQ(tok::text,           Toks[0].getKind());
1251  ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1252
1253  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1254  ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1255
1256  ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1257  ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1258
1259  ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1260
1261  ASSERT_EQ(tok::text,           Toks[4].getKind());
1262  ASSERT_EQ(StringRef("@"),      Toks[4].getText());
1263
1264  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1265}
1266
1267TEST_F(CommentLexerTest, HTML13) {
1268  const char *Sources[] = {
1269    "// <img src=\"val\\\"\\'val",
1270    "// <img src=\"val\\\"\\'val\"",
1271    "// <img src=\'val\\\"\\'val",
1272    "// <img src=\'val\\\"\\'val\'"
1273  };
1274
1275  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1276    std::vector<Token> Toks;
1277
1278    lexString(Sources[i], Toks);
1279
1280    ASSERT_EQ(6U, Toks.size());
1281
1282    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1283    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1284
1285    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1286    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1287
1288    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1289    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1290
1291    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1292
1293    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1294    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1295
1296    ASSERT_EQ(tok::newline,               Toks[5].getKind());
1297  }
1298}
1299
1300TEST_F(CommentLexerTest, HTML14) {
1301  const char *Sources[] = {
1302    "// <img src=\"val\\\"\\'val\">",
1303    "// <img src=\'val\\\"\\'val\'>"
1304  };
1305
1306  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1307    std::vector<Token> Toks;
1308
1309    lexString(Sources[i], Toks);
1310
1311    ASSERT_EQ(7U, Toks.size());
1312
1313    ASSERT_EQ(tok::text,                  Toks[0].getKind());
1314    ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1315
1316    ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1317    ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1318
1319    ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1320    ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1321
1322    ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1323
1324    ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1325    ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1326
1327    ASSERT_EQ(tok::html_greater,          Toks[5].getKind());
1328
1329    ASSERT_EQ(tok::newline,               Toks[6].getKind());
1330  }
1331}
1332
1333TEST_F(CommentLexerTest, HTML15) {
1334  const char *Sources[] = {
1335    "// <img/>",
1336    "// <img />"
1337  };
1338
1339  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1340    std::vector<Token> Toks;
1341
1342    lexString(Sources[i], Toks);
1343
1344    ASSERT_EQ(4U, Toks.size());
1345
1346    ASSERT_EQ(tok::text,               Toks[0].getKind());
1347    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1348
1349    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1350    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1351
1352    ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1353
1354    ASSERT_EQ(tok::newline,            Toks[3].getKind());
1355  }
1356}
1357
1358TEST_F(CommentLexerTest, HTML16) {
1359  const char *Sources[] = {
1360    "// <img/ Aaa",
1361    "// <img / Aaa"
1362  };
1363
1364  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1365    std::vector<Token> Toks;
1366
1367    lexString(Sources[i], Toks);
1368
1369    ASSERT_EQ(5U, Toks.size());
1370
1371    ASSERT_EQ(tok::text,               Toks[0].getKind());
1372    ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1373
1374    ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1375    ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1376
1377    ASSERT_EQ(tok::text,               Toks[2].getKind());
1378    ASSERT_EQ(StringRef("/"),          Toks[2].getText());
1379
1380    ASSERT_EQ(tok::text,               Toks[3].getKind());
1381    ASSERT_EQ(StringRef(" Aaa"),       Toks[3].getText());
1382
1383    ASSERT_EQ(tok::newline,            Toks[4].getKind());
1384  }
1385}
1386
1387TEST_F(CommentLexerTest, HTML17) {
1388  const char *Source = "// </";
1389
1390  std::vector<Token> Toks;
1391
1392  lexString(Source, Toks);
1393
1394  ASSERT_EQ(3U, Toks.size());
1395
1396  ASSERT_EQ(tok::text,       Toks[0].getKind());
1397  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1398
1399  ASSERT_EQ(tok::text,       Toks[1].getKind());
1400  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1401
1402  ASSERT_EQ(tok::newline,    Toks[2].getKind());
1403}
1404
1405TEST_F(CommentLexerTest, HTML18) {
1406  const char *Source = "// </@";
1407
1408  std::vector<Token> Toks;
1409
1410  lexString(Source, Toks);
1411
1412  ASSERT_EQ(4U, Toks.size());
1413
1414  ASSERT_EQ(tok::text,       Toks[0].getKind());
1415  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1416
1417  ASSERT_EQ(tok::text,       Toks[1].getKind());
1418  ASSERT_EQ(StringRef("</"), Toks[1].getText());
1419
1420  ASSERT_EQ(tok::text,       Toks[2].getKind());
1421  ASSERT_EQ(StringRef("@"),  Toks[2].getText());
1422
1423  ASSERT_EQ(tok::newline,    Toks[3].getKind());
1424}
1425
1426TEST_F(CommentLexerTest, HTML19) {
1427  const char *Source = "// </img";
1428
1429  std::vector<Token> Toks;
1430
1431  lexString(Source, Toks);
1432
1433  ASSERT_EQ(3U, Toks.size());
1434
1435  ASSERT_EQ(tok::text,         Toks[0].getKind());
1436  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1437
1438  ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1439  ASSERT_EQ(StringRef("img"),  Toks[1].getHTMLTagEndName());
1440
1441  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1442}
1443
1444TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1445  const char *Source = "// <tag>";
1446
1447  std::vector<Token> Toks;
1448
1449  lexString(Source, Toks);
1450
1451  ASSERT_EQ(4U, Toks.size());
1452
1453  ASSERT_EQ(tok::text,         Toks[0].getKind());
1454  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1455
1456  ASSERT_EQ(tok::text,         Toks[1].getKind());
1457  ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1458
1459  ASSERT_EQ(tok::text,         Toks[2].getKind());
1460  ASSERT_EQ(StringRef(">"),    Toks[2].getText());
1461
1462  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1463}
1464
1465TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1466  const char *Source = "// </tag>";
1467
1468  std::vector<Token> Toks;
1469
1470  lexString(Source, Toks);
1471
1472  ASSERT_EQ(4U, Toks.size());
1473
1474  ASSERT_EQ(tok::text,          Toks[0].getKind());
1475  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1476
1477  ASSERT_EQ(tok::text,          Toks[1].getKind());
1478  ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1479
1480  ASSERT_EQ(tok::text,          Toks[2].getKind());
1481  ASSERT_EQ(StringRef(">"),     Toks[2].getText());
1482
1483  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1484}
1485
1486TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1487  const char *Source = "// &";
1488
1489  std::vector<Token> Toks;
1490
1491  lexString(Source, Toks);
1492
1493  ASSERT_EQ(3U, Toks.size());
1494
1495  ASSERT_EQ(tok::text,         Toks[0].getKind());
1496  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1497
1498  ASSERT_EQ(tok::text,         Toks[1].getKind());
1499  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1500
1501  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1502}
1503
1504TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1505  const char *Source = "// &!";
1506
1507  std::vector<Token> Toks;
1508
1509  lexString(Source, Toks);
1510
1511  ASSERT_EQ(4U, Toks.size());
1512
1513  ASSERT_EQ(tok::text,         Toks[0].getKind());
1514  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1515
1516  ASSERT_EQ(tok::text,         Toks[1].getKind());
1517  ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1518
1519  ASSERT_EQ(tok::text,         Toks[2].getKind());
1520  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1521
1522  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1523}
1524
1525TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1526  const char *Source = "// &amp";
1527
1528  std::vector<Token> Toks;
1529
1530  lexString(Source, Toks);
1531
1532  ASSERT_EQ(3U, Toks.size());
1533
1534  ASSERT_EQ(tok::text,         Toks[0].getKind());
1535  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1536
1537  ASSERT_EQ(tok::text,         Toks[1].getKind());
1538  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1539
1540  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1541}
1542
1543TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1544  const char *Source = "// &amp!";
1545
1546  std::vector<Token> Toks;
1547
1548  lexString(Source, Toks);
1549
1550  ASSERT_EQ(4U, Toks.size());
1551
1552  ASSERT_EQ(tok::text,         Toks[0].getKind());
1553  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1554
1555  ASSERT_EQ(tok::text,         Toks[1].getKind());
1556  ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1557
1558  ASSERT_EQ(tok::text,         Toks[2].getKind());
1559  ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1560
1561  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1562}
1563
1564TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1565  const char *Source = "// &#";
1566
1567  std::vector<Token> Toks;
1568
1569  lexString(Source, Toks);
1570
1571  ASSERT_EQ(3U, Toks.size());
1572
1573  ASSERT_EQ(tok::text,         Toks[0].getKind());
1574  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1575
1576  ASSERT_EQ(tok::text,         Toks[1].getKind());
1577  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1578
1579  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1580}
1581
1582TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1583  const char *Source = "// &#a";
1584
1585  std::vector<Token> Toks;
1586
1587  lexString(Source, Toks);
1588
1589  ASSERT_EQ(4U, Toks.size());
1590
1591  ASSERT_EQ(tok::text,         Toks[0].getKind());
1592  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1593
1594  ASSERT_EQ(tok::text,         Toks[1].getKind());
1595  ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1596
1597  ASSERT_EQ(tok::text,         Toks[2].getKind());
1598  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1599
1600  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1601}
1602
1603TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1604  const char *Source = "// &#42";
1605
1606  std::vector<Token> Toks;
1607
1608  lexString(Source, Toks);
1609
1610  ASSERT_EQ(3U, Toks.size());
1611
1612  ASSERT_EQ(tok::text,         Toks[0].getKind());
1613  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1614
1615  ASSERT_EQ(tok::text,         Toks[1].getKind());
1616  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1617
1618  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1619}
1620
1621TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1622  const char *Source = "// &#42a";
1623
1624  std::vector<Token> Toks;
1625
1626  lexString(Source, Toks);
1627
1628  ASSERT_EQ(4U, Toks.size());
1629
1630  ASSERT_EQ(tok::text,         Toks[0].getKind());
1631  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1632
1633  ASSERT_EQ(tok::text,         Toks[1].getKind());
1634  ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1635
1636  ASSERT_EQ(tok::text,         Toks[2].getKind());
1637  ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1638
1639  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1640}
1641
1642TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1643  const char *Source = "// &#x";
1644
1645  std::vector<Token> Toks;
1646
1647  lexString(Source, Toks);
1648
1649  ASSERT_EQ(3U, Toks.size());
1650
1651  ASSERT_EQ(tok::text,         Toks[0].getKind());
1652  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1653
1654  ASSERT_EQ(tok::text,         Toks[1].getKind());
1655  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1656
1657  ASSERT_EQ(tok::newline,      Toks[2].getKind());
1658}
1659
1660TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1661  const char *Source = "// &#xz";
1662
1663  std::vector<Token> Toks;
1664
1665  lexString(Source, Toks);
1666
1667  ASSERT_EQ(4U, Toks.size());
1668
1669  ASSERT_EQ(tok::text,         Toks[0].getKind());
1670  ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1671
1672  ASSERT_EQ(tok::text,         Toks[1].getKind());
1673  ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1674
1675  ASSERT_EQ(tok::text,         Toks[2].getKind());
1676  ASSERT_EQ(StringRef("z"),    Toks[2].getText());
1677
1678  ASSERT_EQ(tok::newline,      Toks[3].getKind());
1679}
1680
1681TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1682  const char *Source = "// &#xab";
1683
1684  std::vector<Token> Toks;
1685
1686  lexString(Source, Toks);
1687
1688  ASSERT_EQ(3U, Toks.size());
1689
1690  ASSERT_EQ(tok::text,          Toks[0].getKind());
1691  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1692
1693  ASSERT_EQ(tok::text,          Toks[1].getKind());
1694  ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1695
1696  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1697}
1698
1699TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1700  const char *Source = "// &#xaBz";
1701
1702  std::vector<Token> Toks;
1703
1704  lexString(Source, Toks);
1705
1706  ASSERT_EQ(4U, Toks.size());
1707
1708  ASSERT_EQ(tok::text,          Toks[0].getKind());
1709  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1710
1711  ASSERT_EQ(tok::text,          Toks[1].getKind());
1712  ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1713
1714  ASSERT_EQ(tok::text,          Toks[2].getKind());
1715  ASSERT_EQ(StringRef("z"),     Toks[2].getText());
1716
1717  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1718}
1719
1720TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1721  const char *Source = "// &amp;";
1722
1723  std::vector<Token> Toks;
1724
1725  lexString(Source, Toks);
1726
1727  ASSERT_EQ(3U, Toks.size());
1728
1729  ASSERT_EQ(tok::text,          Toks[0].getKind());
1730  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1731
1732  ASSERT_EQ(tok::text,          Toks[1].getKind());
1733  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1734
1735  ASSERT_EQ(tok::newline,       Toks[2].getKind());
1736}
1737
1738TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1739  const char *Source = "// &amp;&lt;";
1740
1741  std::vector<Token> Toks;
1742
1743  lexString(Source, Toks);
1744
1745  ASSERT_EQ(4U, Toks.size());
1746
1747  ASSERT_EQ(tok::text,          Toks[0].getKind());
1748  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1749
1750  ASSERT_EQ(tok::text,          Toks[1].getKind());
1751  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1752
1753  ASSERT_EQ(tok::text,          Toks[2].getKind());
1754  ASSERT_EQ(StringRef("<"),     Toks[2].getText());
1755
1756  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1757}
1758
1759TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1760  const char *Source = "// &amp; meow";
1761
1762  std::vector<Token> Toks;
1763
1764  lexString(Source, Toks);
1765
1766  ASSERT_EQ(4U, Toks.size());
1767
1768  ASSERT_EQ(tok::text,          Toks[0].getKind());
1769  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1770
1771  ASSERT_EQ(tok::text,          Toks[1].getKind());
1772  ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1773
1774  ASSERT_EQ(tok::text,          Toks[2].getKind());
1775  ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1776
1777  ASSERT_EQ(tok::newline,       Toks[3].getKind());
1778}
1779
1780TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1781  const char *Sources[] = {
1782    "// &#61;",
1783    "// &#x3d;",
1784    "// &#X3d;",
1785    "// &#X3D;"
1786  };
1787
1788  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1789    std::vector<Token> Toks;
1790
1791    lexString(Sources[i], Toks);
1792
1793    ASSERT_EQ(3U, Toks.size());
1794
1795    ASSERT_EQ(tok::text,          Toks[0].getKind());
1796    ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1797
1798    ASSERT_EQ(tok::text,          Toks[1].getKind());
1799    ASSERT_EQ(StringRef("="),     Toks[1].getText());
1800
1801    ASSERT_EQ(tok::newline,       Toks[2].getKind());
1802  }
1803}
1804
1805TEST_F(CommentLexerTest, MultipleComments) {
1806  const char *Source =
1807    "// Aaa\n"
1808    "/// Bbb\n"
1809    "/* Ccc\n"
1810    " * Ddd*/\n"
1811    "/** Eee*/";
1812
1813  std::vector<Token> Toks;
1814
1815  lexString(Source, Toks);
1816
1817  ASSERT_EQ(12U, Toks.size());
1818
1819  ASSERT_EQ(tok::text,           Toks[0].getKind());
1820  ASSERT_EQ(StringRef(" Aaa"),   Toks[0].getText());
1821  ASSERT_EQ(tok::newline,        Toks[1].getKind());
1822
1823  ASSERT_EQ(tok::text,           Toks[2].getKind());
1824  ASSERT_EQ(StringRef(" Bbb"),   Toks[2].getText());
1825  ASSERT_EQ(tok::newline,        Toks[3].getKind());
1826
1827  ASSERT_EQ(tok::text,           Toks[4].getKind());
1828  ASSERT_EQ(StringRef(" Ccc"),   Toks[4].getText());
1829  ASSERT_EQ(tok::newline,        Toks[5].getKind());
1830
1831  ASSERT_EQ(tok::text,           Toks[6].getKind());
1832  ASSERT_EQ(StringRef(" Ddd"),   Toks[6].getText());
1833  ASSERT_EQ(tok::newline,        Toks[7].getKind());
1834  ASSERT_EQ(tok::newline,        Toks[8].getKind());
1835
1836  ASSERT_EQ(tok::text,           Toks[9].getKind());
1837  ASSERT_EQ(StringRef(" Eee"),   Toks[9].getText());
1838
1839  ASSERT_EQ(tok::newline,        Toks[10].getKind());
1840  ASSERT_EQ(tok::newline,        Toks[11].getKind());
1841}
1842
1843} // end namespace comments
1844} // end namespace clang
1845
1846