CommentParser.cpp revision 8d3ba23f2d9e6c87794d059412a0808c9cbacb25
1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
12#include "llvm/Support/ErrorHandling.h"
13
14namespace clang {
15namespace comments {
16
17Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
18    L(L), S(S), Allocator(Allocator) {
19  consumeToken();
20}
21
22ParamCommandComment *Parser::parseParamCommandArgs(
23    ParamCommandComment *PC,
24    TextTokenRetokenizer &Retokenizer) {
25  Token Arg;
26  // Check if argument looks like direction specification: [dir]
27  // e.g., [in], [out], [in,out]
28  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
29    PC = S.actOnParamCommandArg(PC,
30                                Arg.getLocation(),
31                                Arg.getEndLocation(),
32                                Arg.getText(),
33                                /* IsDirection = */ true);
34
35  if (Retokenizer.lexWord(Arg))
36    StringRef ArgText = Arg.getText();
37    PC = S.actOnParamCommandArg(PC,
38                                Arg.getLocation(),
39                                Arg.getEndLocation(),
40                                Arg.getText(),
41                                /* IsDirection = */ false);
42
43  return PC;
44}
45
46BlockCommandComment *Parser::parseBlockCommandArgs(
47    BlockCommandComment *BC,
48    TextTokenRetokenizer &Retokenizer,
49    unsigned NumArgs) {
50  typedef BlockCommandComment::Argument Argument;
51  Argument *Args = new (Allocator) Argument[NumArgs];
52  unsigned ParsedArgs = 0;
53  Token Arg;
54  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
55    Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
56                                            Arg.getEndLocation()),
57                                Arg.getText());
58    ParsedArgs++;
59  }
60
61  return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
62}
63
64BlockCommandComment *Parser::parseBlockCommand() {
65  assert(Tok.is(tok::command));
66
67  ParamCommandComment *PC;
68  BlockCommandComment *BC;
69  bool IsParam = false;
70  unsigned NumArgs = 0;
71  if (S.isParamCommand(Tok.getCommandName())) {
72    IsParam = true;
73    PC = S.actOnParamCommandStart(Tok.getLocation(),
74                                  Tok.getEndLocation(),
75                                  Tok.getCommandName());
76  } else {
77    NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
78    BC = S.actOnBlockCommandStart(Tok.getLocation(),
79                                  Tok.getEndLocation(),
80                                  Tok.getCommandName());
81  }
82  consumeToken();
83
84  if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
85    // Block command ahead.  We can't nest block commands, so pretend that this
86    // command has an empty argument.
87    // TODO: Diag() Warn empty arg to block command
88    ParagraphComment *PC = S.actOnParagraphComment(
89                                ArrayRef<InlineContentComment *>());
90    return S.actOnBlockCommandFinish(BC, PC);
91  }
92
93  if (IsParam || NumArgs > 0) {
94    // In order to parse command arguments we need to retokenize a few
95    // following text tokens.
96    TextTokenRetokenizer Retokenizer(Allocator);
97    while (Tok.is(tok::text)) {
98      if (Retokenizer.addToken(Tok))
99        consumeToken();
100    }
101
102    if (IsParam)
103      PC = parseParamCommandArgs(PC, Retokenizer);
104    else
105      BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106
107    // Put back tokens we didn't use.
108    Token Text;
109    while (Retokenizer.lexText(Text))
110      putBack(Text);
111  }
112
113  BlockContentComment *Block = parseParagraphOrBlockCommand();
114  // Since we have checked for a block command, we should have parsed a
115  // paragraph.
116  if (IsParam)
117    return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
118  else
119    return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
120}
121
122InlineCommandComment *Parser::parseInlineCommand() {
123  assert(Tok.is(tok::command));
124
125  const Token CommandTok = Tok;
126  consumeToken();
127
128  TextTokenRetokenizer Retokenizer(Allocator);
129  while (Tok.is(tok::text)) {
130    if (Retokenizer.addToken(Tok))
131      consumeToken();
132  }
133
134  Token ArgTok;
135  bool ArgTokValid = Retokenizer.lexWord(ArgTok);
136
137  InlineCommandComment *IC;
138  if (ArgTokValid) {
139    IC = S.actOnInlineCommand(CommandTok.getLocation(),
140                              CommandTok.getEndLocation(),
141                              CommandTok.getCommandName(),
142                              ArgTok.getLocation(),
143                              ArgTok.getEndLocation(),
144                              ArgTok.getText());
145  } else {
146    IC = S.actOnInlineCommand(CommandTok.getLocation(),
147                              CommandTok.getEndLocation(),
148                              CommandTok.getCommandName());
149  }
150
151  Token Text;
152  while (Retokenizer.lexText(Text))
153    putBack(Text);
154
155  return IC;
156}
157
158HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
159  assert(Tok.is(tok::html_tag_open));
160  HTMLOpenTagComment *HOT =
161      S.actOnHTMLOpenTagStart(Tok.getLocation(),
162                              Tok.getHTMLTagOpenName());
163  consumeToken();
164
165  SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
166  while (true) {
167    if (Tok.is(tok::html_ident)) {
168      Token Ident = Tok;
169      consumeToken();
170      if (Tok.isNot(tok::html_equals)) {
171        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
172                                                      Ident.getHTMLIdent()));
173        continue;
174      }
175      Token Equals = Tok;
176      consumeToken();
177      if (Tok.isNot(tok::html_quoted_string)) {
178        // TODO: Diag() expected quoted string
179        Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
180                                                      Ident.getHTMLIdent()));
181        continue;
182      }
183      Attrs.push_back(HTMLOpenTagComment::Attribute(
184                              Ident.getLocation(),
185                              Ident.getHTMLIdent(),
186                              Equals.getLocation(),
187                              SourceRange(Tok.getLocation(),
188                                          Tok.getEndLocation()),
189                              Tok.getHTMLQuotedString()));
190      consumeToken();
191      continue;
192    } else if (Tok.is(tok::html_greater)) {
193      HOT = S.actOnHTMLOpenTagFinish(HOT,
194                                     copyArray(llvm::makeArrayRef(Attrs)),
195                                     Tok.getLocation());
196      consumeToken();
197      return HOT;
198    } else if (Tok.is(tok::html_equals) ||
199               Tok.is(tok::html_quoted_string)) {
200      // TODO: Diag() Err expected ident
201      while (Tok.is(tok::html_equals) ||
202             Tok.is(tok::html_quoted_string))
203        consumeToken();
204    } else {
205      // Not a token from HTML open tag.  Thus HTML tag prematurely ended.
206      // TODO: Diag() Err HTML tag prematurely ended
207      return S.actOnHTMLOpenTagFinish(HOT,
208                                      copyArray(llvm::makeArrayRef(Attrs)),
209                                      SourceLocation());
210    }
211  }
212}
213
214HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
215  assert(Tok.is(tok::html_tag_close));
216  Token TokTagOpen = Tok;
217  consumeToken();
218  SourceLocation Loc;
219  if (Tok.is(tok::html_greater)) {
220    Loc = Tok.getLocation();
221    consumeToken();
222  }
223
224  return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
225                             Loc,
226                             TokTagOpen.getHTMLTagCloseName());
227}
228
229BlockContentComment *Parser::parseParagraphOrBlockCommand() {
230  SmallVector<InlineContentComment *, 8> Content;
231
232  while (true) {
233    switch (Tok.getKind()) {
234    case tok::verbatim_block_begin:
235    case tok::verbatim_line_name:
236    case tok::eof:
237      assert(Content.size() != 0);
238      break; // Block content or EOF ahead, finish this parapgaph.
239
240    case tok::command:
241      if (S.isBlockCommand(Tok.getCommandName())) {
242        if (Content.size() == 0)
243          return parseBlockCommand();
244        break; // Block command ahead, finish this parapgaph.
245      }
246      if (S.isInlineCommand(Tok.getCommandName())) {
247        Content.push_back(parseInlineCommand());
248        continue;
249      }
250
251      // Not a block command, not an inline command ==> an unknown command.
252      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
253                                              Tok.getEndLocation(),
254                                              Tok.getCommandName()));
255      consumeToken();
256      continue;
257
258    case tok::newline: {
259      consumeToken();
260      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
261        consumeToken();
262        break; // Two newlines -- end of paragraph.
263      }
264      if (Content.size() > 0)
265        Content.back()->addTrailingNewline();
266      continue;
267    }
268
269    // Don't deal with HTML tag soup now.
270    case tok::html_tag_open:
271      Content.push_back(parseHTMLOpenTag());
272      continue;
273
274    case tok::html_tag_close:
275      Content.push_back(parseHTMLCloseTag());
276      continue;
277
278    case tok::text:
279      Content.push_back(S.actOnText(Tok.getLocation(),
280                                    Tok.getEndLocation(),
281                                    Tok.getText()));
282      consumeToken();
283      continue;
284
285    case tok::verbatim_block_line:
286    case tok::verbatim_block_end:
287    case tok::verbatim_line_text:
288    case tok::html_ident:
289    case tok::html_equals:
290    case tok::html_quoted_string:
291    case tok::html_greater:
292      llvm_unreachable("should not see this token");
293    }
294    break;
295  }
296
297  return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
298}
299
300VerbatimBlockComment *Parser::parseVerbatimBlock() {
301  assert(Tok.is(tok::verbatim_block_begin));
302
303  VerbatimBlockComment *VB =
304      S.actOnVerbatimBlockStart(Tok.getLocation(),
305                                Tok.getVerbatimBlockName());
306  consumeToken();
307
308  // Don't create an empty line if verbatim opening command is followed
309  // by a newline.
310  if (Tok.is(tok::newline))
311    consumeToken();
312
313  SmallVector<VerbatimBlockLineComment *, 8> Lines;
314  while (Tok.is(tok::verbatim_block_line) ||
315         Tok.is(tok::newline)) {
316    VerbatimBlockLineComment *Line;
317    if (Tok.is(tok::verbatim_block_line)) {
318      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
319                                      Tok.getVerbatimBlockText());
320      consumeToken();
321      if (Tok.is(tok::newline)) {
322        consumeToken();
323      }
324    } else {
325      // Empty line, just a tok::newline.
326      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
327                                      "");
328      consumeToken();
329    }
330    Lines.push_back(Line);
331  }
332
333  assert(Tok.is(tok::verbatim_block_end));
334  VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
335                                  Tok.getVerbatimBlockName(),
336                                  copyArray(llvm::makeArrayRef(Lines)));
337  consumeToken();
338
339  return VB;
340}
341
342VerbatimLineComment *Parser::parseVerbatimLine() {
343  assert(Tok.is(tok::verbatim_line_name));
344
345  Token NameTok = Tok;
346  consumeToken();
347
348  SourceLocation TextBegin;
349  StringRef Text;
350  // Next token might not be a tok::verbatim_line_text if verbatim line
351  // starting command comes just before a newline or comment end.
352  if (Tok.is(tok::verbatim_line_text)) {
353    TextBegin = Tok.getLocation();
354    Text = Tok.getVerbatimLineText();
355  } else {
356    TextBegin = NameTok.getEndLocation();
357    Text = "";
358  }
359
360  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
361                                                NameTok.getVerbatimLineName(),
362                                                TextBegin,
363                                                Text);
364  consumeToken();
365  return VL;
366}
367
368BlockContentComment *Parser::parseBlockContent() {
369  switch (Tok.getKind()) {
370  case tok::text:
371  case tok::command:
372  case tok::html_tag_open:
373  case tok::html_tag_close:
374    return parseParagraphOrBlockCommand();
375
376  case tok::verbatim_block_begin:
377    return parseVerbatimBlock();
378
379  case tok::verbatim_line_name:
380    return parseVerbatimLine();
381
382  case tok::eof:
383  case tok::newline:
384  case tok::verbatim_block_line:
385  case tok::verbatim_block_end:
386  case tok::verbatim_line_text:
387  case tok::html_ident:
388  case tok::html_equals:
389  case tok::html_quoted_string:
390  case tok::html_greater:
391    llvm_unreachable("should not see this token");
392  }
393}
394
395FullComment *Parser::parseFullComment() {
396  // Skip newlines at the beginning of the comment.
397  while (Tok.is(tok::newline))
398    consumeToken();
399
400  SmallVector<BlockContentComment *, 8> Blocks;
401  while (Tok.isNot(tok::eof)) {
402    Blocks.push_back(parseBlockContent());
403
404    // Skip extra newlines after paragraph end.
405    while (Tok.is(tok::newline))
406      consumeToken();
407  }
408  return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
409}
410
411} // end namespace comments
412} // end namespace clang
413
414
415