CommentParser.cpp revision a5ef44ff5d93a3be6ca67782828157a71894cf0c
1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
12#include "clang/AST/CommentDiagnostic.h"
13#include "clang/Basic/SourceManager.h"
14#include "llvm/Support/ErrorHandling.h"
15
16namespace clang {
17namespace comments {
18
19Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
20               const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
21    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
22  consumeToken();
23}
24
25ParamCommandComment *Parser::parseParamCommandArgs(
26    ParamCommandComment *PC,
27    TextTokenRetokenizer &Retokenizer) {
28  Token Arg;
29  // Check if argument looks like direction specification: [dir]
30  // e.g., [in], [out], [in,out]
31  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
32    PC = S.actOnParamCommandDirectionArg(PC,
33                                         Arg.getLocation(),
34                                         Arg.getEndLocation(),
35                                         Arg.getText());
36
37  if (Retokenizer.lexWord(Arg))
38    PC = S.actOnParamCommandParamNameArg(PC,
39                                         Arg.getLocation(),
40                                         Arg.getEndLocation(),
41                                         Arg.getText());
42
43  return PC;
44}
45
46BlockCommandComment *Parser::parseBlockCommandArgs(
47    BlockCommandComment *BC,
48    TextTokenRetokenizer &Retokenizer,
49    unsigned NumArgs) {
50  typedef BlockCommandComment::Argument Argument;
51  Argument *Args =
52      new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
53  unsigned ParsedArgs = 0;
54  Token Arg;
55  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
56    Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
57                                            Arg.getEndLocation()),
58                                Arg.getText());
59    ParsedArgs++;
60  }
61
62  return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
63}
64
65BlockCommandComment *Parser::parseBlockCommand() {
66  assert(Tok.is(tok::command));
67
68  ParamCommandComment *PC;
69  BlockCommandComment *BC;
70  bool IsParam = false;
71  unsigned NumArgs = 0;
72  if (S.isParamCommand(Tok.getCommandName())) {
73    IsParam = true;
74    PC = S.actOnParamCommandStart(Tok.getLocation(),
75                                  Tok.getEndLocation(),
76                                  Tok.getCommandName());
77  } else {
78    NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
79    BC = S.actOnBlockCommandStart(Tok.getLocation(),
80                                  Tok.getEndLocation(),
81                                  Tok.getCommandName());
82  }
83  consumeToken();
84
85  if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
86    // Block command ahead.  We can't nest block commands, so pretend that this
87    // command has an empty argument.
88    ParagraphComment *PC = S.actOnParagraphComment(
89                                ArrayRef<InlineContentComment *>());
90    return S.actOnBlockCommandFinish(BC, PC);
91  }
92
93  if (IsParam || NumArgs > 0) {
94    // In order to parse command arguments we need to retokenize a few
95    // following text tokens.
96    TextTokenRetokenizer Retokenizer(Allocator);
97    while (Tok.is(tok::text)) {
98      if (Retokenizer.addToken(Tok))
99        consumeToken();
100    }
101
102    if (IsParam)
103      PC = parseParamCommandArgs(PC, Retokenizer);
104    else
105      BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106
107    // Put back tokens we didn't use.
108    Token Text;
109    while (Retokenizer.lexText(Text))
110      putBack(Text);
111  }
112
113  BlockContentComment *Block = parseParagraphOrBlockCommand();
114  // Since we have checked for a block command, we should have parsed a
115  // paragraph.
116  if (IsParam)
117    return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
118  else
119    return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
120}
121
122InlineCommandComment *Parser::parseInlineCommand() {
123  assert(Tok.is(tok::command));
124
125  const Token CommandTok = Tok;
126  consumeToken();
127
128  TextTokenRetokenizer Retokenizer(Allocator);
129  while (Tok.is(tok::text)) {
130    if (Retokenizer.addToken(Tok))
131      consumeToken();
132  }
133
134  Token ArgTok;
135  bool ArgTokValid = Retokenizer.lexWord(ArgTok);
136
137  InlineCommandComment *IC;
138  if (ArgTokValid) {
139    IC = S.actOnInlineCommand(CommandTok.getLocation(),
140                              CommandTok.getEndLocation(),
141                              CommandTok.getCommandName(),
142                              ArgTok.getLocation(),
143                              ArgTok.getEndLocation(),
144                              ArgTok.getText());
145  } else {
146    IC = S.actOnInlineCommand(CommandTok.getLocation(),
147                              CommandTok.getEndLocation(),
148                              CommandTok.getCommandName());
149  }
150
151  Token Text;
152  while (Retokenizer.lexText(Text))
153    putBack(Text);
154
155  return IC;
156}
157
158HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
159  assert(Tok.is(tok::html_tag_open));
160  HTMLOpenTagComment *HOT =
161      S.actOnHTMLOpenTagStart(Tok.getLocation(),
162                              Tok.getHTMLTagOpenName());
163  consumeToken();
164
165  SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
166  while (true) {
167    switch (Tok.getKind()) {
168    case tok::html_ident: {
169      Token Ident = Tok;
170      consumeToken();
171      if (Tok.isNot(tok::html_equals)) {
172        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
173                                                      Ident.getHTMLIdent()));
174        continue;
175      }
176      Token Equals = Tok;
177      consumeToken();
178      if (Tok.isNot(tok::html_quoted_string)) {
179        Diag(Tok.getLocation(),
180             diag::warn_doc_html_open_tag_expected_quoted_string)
181          << SourceRange(Equals.getLocation());
182        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
183                                                      Ident.getHTMLIdent()));
184        while (Tok.is(tok::html_equals) ||
185               Tok.is(tok::html_quoted_string))
186          consumeToken();
187        continue;
188      }
189      Attrs.push_back(HTMLOpenTagComment::Attribute(
190                              Ident.getLocation(),
191                              Ident.getHTMLIdent(),
192                              Equals.getLocation(),
193                              SourceRange(Tok.getLocation(),
194                                          Tok.getEndLocation()),
195                              Tok.getHTMLQuotedString()));
196      consumeToken();
197      continue;
198    }
199
200    case tok::html_greater:
201      HOT = S.actOnHTMLOpenTagFinish(HOT,
202                                     copyArray(llvm::makeArrayRef(Attrs)),
203                                     Tok.getLocation(),
204                                     /* IsSelfClosing = */ false);
205      consumeToken();
206      return HOT;
207
208    case tok::html_slash_greater:
209      HOT = S.actOnHTMLOpenTagFinish(HOT,
210                                     copyArray(llvm::makeArrayRef(Attrs)),
211                                     Tok.getLocation(),
212                                     /* IsSelfClosing = */ true);
213      consumeToken();
214      return HOT;
215
216    case tok::html_equals:
217    case tok::html_quoted_string:
218      Diag(Tok.getLocation(),
219           diag::warn_doc_html_open_tag_expected_ident_or_greater);
220      while (Tok.is(tok::html_equals) ||
221             Tok.is(tok::html_quoted_string))
222        consumeToken();
223      if (Tok.is(tok::html_ident) ||
224          Tok.is(tok::html_greater) ||
225          Tok.is(tok::html_slash_greater))
226        continue;
227
228      return S.actOnHTMLOpenTagFinish(HOT,
229                                      copyArray(llvm::makeArrayRef(Attrs)),
230                                      SourceLocation(),
231                                      /* IsSelfClosing = */ false);
232
233    default:
234      // Not a token from an HTML open tag.  Thus HTML tag prematurely ended.
235      HOT = S.actOnHTMLOpenTagFinish(HOT,
236                                     copyArray(llvm::makeArrayRef(Attrs)),
237                                     SourceLocation(),
238                                     /* IsSelfClosing = */ false);
239      bool StartLineInvalid;
240      const unsigned StartLine = SourceMgr.getPresumedLineNumber(
241                                                  HOT->getLocation(),
242                                                  &StartLineInvalid);
243      bool EndLineInvalid;
244      const unsigned EndLine = SourceMgr.getPresumedLineNumber(
245                                                  Tok.getLocation(),
246                                                  &EndLineInvalid);
247      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
248        Diag(Tok.getLocation(),
249             diag::warn_doc_html_open_tag_expected_ident_or_greater)
250          << HOT->getSourceRange();
251      else {
252        Diag(Tok.getLocation(),
253             diag::warn_doc_html_open_tag_expected_ident_or_greater);
254        Diag(HOT->getLocation(), diag::note_doc_html_tag_started_here)
255          << HOT->getSourceRange();
256      }
257      return HOT;
258    }
259  }
260}
261
262HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
263  assert(Tok.is(tok::html_tag_close));
264  Token TokTagOpen = Tok;
265  consumeToken();
266  SourceLocation Loc;
267  if (Tok.is(tok::html_greater)) {
268    Loc = Tok.getLocation();
269    consumeToken();
270  }
271
272  return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
273                             Loc,
274                             TokTagOpen.getHTMLTagCloseName());
275}
276
277BlockContentComment *Parser::parseParagraphOrBlockCommand() {
278  SmallVector<InlineContentComment *, 8> Content;
279
280  while (true) {
281    switch (Tok.getKind()) {
282    case tok::verbatim_block_begin:
283    case tok::verbatim_line_name:
284    case tok::eof:
285      assert(Content.size() != 0);
286      break; // Block content or EOF ahead, finish this parapgaph.
287
288    case tok::command:
289      if (S.isBlockCommand(Tok.getCommandName())) {
290        if (Content.size() == 0)
291          return parseBlockCommand();
292        break; // Block command ahead, finish this parapgaph.
293      }
294      if (S.isInlineCommand(Tok.getCommandName())) {
295        Content.push_back(parseInlineCommand());
296        continue;
297      }
298
299      // Not a block command, not an inline command ==> an unknown command.
300      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
301                                              Tok.getEndLocation(),
302                                              Tok.getCommandName()));
303      consumeToken();
304      continue;
305
306    case tok::newline: {
307      consumeToken();
308      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
309        consumeToken();
310        break; // Two newlines -- end of paragraph.
311      }
312      if (Content.size() > 0)
313        Content.back()->addTrailingNewline();
314      continue;
315    }
316
317    // Don't deal with HTML tag soup now.
318    case tok::html_tag_open:
319      Content.push_back(parseHTMLOpenTag());
320      continue;
321
322    case tok::html_tag_close:
323      Content.push_back(parseHTMLCloseTag());
324      continue;
325
326    case tok::text:
327      Content.push_back(S.actOnText(Tok.getLocation(),
328                                    Tok.getEndLocation(),
329                                    Tok.getText()));
330      consumeToken();
331      continue;
332
333    case tok::verbatim_block_line:
334    case tok::verbatim_block_end:
335    case tok::verbatim_line_text:
336    case tok::html_ident:
337    case tok::html_equals:
338    case tok::html_quoted_string:
339    case tok::html_greater:
340    case tok::html_slash_greater:
341      llvm_unreachable("should not see this token");
342    }
343    break;
344  }
345
346  return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
347}
348
349VerbatimBlockComment *Parser::parseVerbatimBlock() {
350  assert(Tok.is(tok::verbatim_block_begin));
351
352  VerbatimBlockComment *VB =
353      S.actOnVerbatimBlockStart(Tok.getLocation(),
354                                Tok.getVerbatimBlockName());
355  consumeToken();
356
357  // Don't create an empty line if verbatim opening command is followed
358  // by a newline.
359  if (Tok.is(tok::newline))
360    consumeToken();
361
362  SmallVector<VerbatimBlockLineComment *, 8> Lines;
363  while (Tok.is(tok::verbatim_block_line) ||
364         Tok.is(tok::newline)) {
365    VerbatimBlockLineComment *Line;
366    if (Tok.is(tok::verbatim_block_line)) {
367      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
368                                      Tok.getVerbatimBlockText());
369      consumeToken();
370      if (Tok.is(tok::newline)) {
371        consumeToken();
372      }
373    } else {
374      // Empty line, just a tok::newline.
375      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
376                                      "");
377      consumeToken();
378    }
379    Lines.push_back(Line);
380  }
381
382  assert(Tok.is(tok::verbatim_block_end));
383  VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
384                                  Tok.getVerbatimBlockName(),
385                                  copyArray(llvm::makeArrayRef(Lines)));
386  consumeToken();
387
388  return VB;
389}
390
391VerbatimLineComment *Parser::parseVerbatimLine() {
392  assert(Tok.is(tok::verbatim_line_name));
393
394  Token NameTok = Tok;
395  consumeToken();
396
397  SourceLocation TextBegin;
398  StringRef Text;
399  // Next token might not be a tok::verbatim_line_text if verbatim line
400  // starting command comes just before a newline or comment end.
401  if (Tok.is(tok::verbatim_line_text)) {
402    TextBegin = Tok.getLocation();
403    Text = Tok.getVerbatimLineText();
404  } else {
405    TextBegin = NameTok.getEndLocation();
406    Text = "";
407  }
408
409  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
410                                                NameTok.getVerbatimLineName(),
411                                                TextBegin,
412                                                Text);
413  consumeToken();
414  return VL;
415}
416
417BlockContentComment *Parser::parseBlockContent() {
418  switch (Tok.getKind()) {
419  case tok::text:
420  case tok::command:
421  case tok::html_tag_open:
422  case tok::html_tag_close:
423    return parseParagraphOrBlockCommand();
424
425  case tok::verbatim_block_begin:
426    return parseVerbatimBlock();
427
428  case tok::verbatim_line_name:
429    return parseVerbatimLine();
430
431  case tok::eof:
432  case tok::newline:
433  case tok::verbatim_block_line:
434  case tok::verbatim_block_end:
435  case tok::verbatim_line_text:
436  case tok::html_ident:
437  case tok::html_equals:
438  case tok::html_quoted_string:
439  case tok::html_greater:
440  case tok::html_slash_greater:
441    llvm_unreachable("should not see this token");
442  }
443  llvm_unreachable("bogus token kind");
444}
445
446FullComment *Parser::parseFullComment() {
447  // Skip newlines at the beginning of the comment.
448  while (Tok.is(tok::newline))
449    consumeToken();
450
451  SmallVector<BlockContentComment *, 8> Blocks;
452  while (Tok.isNot(tok::eof)) {
453    Blocks.push_back(parseBlockContent());
454
455    // Skip extra newlines after paragraph end.
456    while (Tok.is(tok::newline))
457      consumeToken();
458  }
459  return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
460}
461
462} // end namespace comments
463} // end namespace clang
464