CommentParser.cpp revision a5ef44ff5d93a3be6ca67782828157a71894cf0c
1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "clang/AST/CommentParser.h" 11#include "clang/AST/CommentSema.h" 12#include "clang/AST/CommentDiagnostic.h" 13#include "clang/Basic/SourceManager.h" 14#include "llvm/Support/ErrorHandling.h" 15 16namespace clang { 17namespace comments { 18 19Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 20 const SourceManager &SourceMgr, DiagnosticsEngine &Diags): 21 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { 22 consumeToken(); 23} 24 25ParamCommandComment *Parser::parseParamCommandArgs( 26 ParamCommandComment *PC, 27 TextTokenRetokenizer &Retokenizer) { 28 Token Arg; 29 // Check if argument looks like direction specification: [dir] 30 // e.g., [in], [out], [in,out] 31 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 32 PC = S.actOnParamCommandDirectionArg(PC, 33 Arg.getLocation(), 34 Arg.getEndLocation(), 35 Arg.getText()); 36 37 if (Retokenizer.lexWord(Arg)) 38 PC = S.actOnParamCommandParamNameArg(PC, 39 Arg.getLocation(), 40 Arg.getEndLocation(), 41 Arg.getText()); 42 43 return PC; 44} 45 46BlockCommandComment *Parser::parseBlockCommandArgs( 47 BlockCommandComment *BC, 48 TextTokenRetokenizer &Retokenizer, 49 unsigned NumArgs) { 50 typedef BlockCommandComment::Argument Argument; 51 Argument *Args = 52 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; 53 unsigned ParsedArgs = 0; 54 Token Arg; 55 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 56 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), 57 Arg.getEndLocation()), 58 Arg.getText()); 59 ParsedArgs++; 60 } 61 62 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); 63} 64 65BlockCommandComment *Parser::parseBlockCommand() { 66 assert(Tok.is(tok::command)); 67 68 ParamCommandComment *PC; 69 BlockCommandComment *BC; 70 bool IsParam = false; 71 unsigned NumArgs = 0; 72 if (S.isParamCommand(Tok.getCommandName())) { 73 IsParam = true; 74 PC = S.actOnParamCommandStart(Tok.getLocation(), 75 Tok.getEndLocation(), 76 Tok.getCommandName()); 77 } else { 78 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); 79 BC = S.actOnBlockCommandStart(Tok.getLocation(), 80 Tok.getEndLocation(), 81 Tok.getCommandName()); 82 } 83 consumeToken(); 84 85 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { 86 // Block command ahead. We can't nest block commands, so pretend that this 87 // command has an empty argument. 88 ParagraphComment *PC = S.actOnParagraphComment( 89 ArrayRef<InlineContentComment *>()); 90 return S.actOnBlockCommandFinish(BC, PC); 91 } 92 93 if (IsParam || NumArgs > 0) { 94 // In order to parse command arguments we need to retokenize a few 95 // following text tokens. 96 TextTokenRetokenizer Retokenizer(Allocator); 97 while (Tok.is(tok::text)) { 98 if (Retokenizer.addToken(Tok)) 99 consumeToken(); 100 } 101 102 if (IsParam) 103 PC = parseParamCommandArgs(PC, Retokenizer); 104 else 105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); 106 107 // Put back tokens we didn't use. 108 Token Text; 109 while (Retokenizer.lexText(Text)) 110 putBack(Text); 111 } 112 113 BlockContentComment *Block = parseParagraphOrBlockCommand(); 114 // Since we have checked for a block command, we should have parsed a 115 // paragraph. 116 if (IsParam) 117 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); 118 else 119 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); 120} 121 122InlineCommandComment *Parser::parseInlineCommand() { 123 assert(Tok.is(tok::command)); 124 125 const Token CommandTok = Tok; 126 consumeToken(); 127 128 TextTokenRetokenizer Retokenizer(Allocator); 129 while (Tok.is(tok::text)) { 130 if (Retokenizer.addToken(Tok)) 131 consumeToken(); 132 } 133 134 Token ArgTok; 135 bool ArgTokValid = Retokenizer.lexWord(ArgTok); 136 137 InlineCommandComment *IC; 138 if (ArgTokValid) { 139 IC = S.actOnInlineCommand(CommandTok.getLocation(), 140 CommandTok.getEndLocation(), 141 CommandTok.getCommandName(), 142 ArgTok.getLocation(), 143 ArgTok.getEndLocation(), 144 ArgTok.getText()); 145 } else { 146 IC = S.actOnInlineCommand(CommandTok.getLocation(), 147 CommandTok.getEndLocation(), 148 CommandTok.getCommandName()); 149 } 150 151 Token Text; 152 while (Retokenizer.lexText(Text)) 153 putBack(Text); 154 155 return IC; 156} 157 158HTMLOpenTagComment *Parser::parseHTMLOpenTag() { 159 assert(Tok.is(tok::html_tag_open)); 160 HTMLOpenTagComment *HOT = 161 S.actOnHTMLOpenTagStart(Tok.getLocation(), 162 Tok.getHTMLTagOpenName()); 163 consumeToken(); 164 165 SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs; 166 while (true) { 167 switch (Tok.getKind()) { 168 case tok::html_ident: { 169 Token Ident = Tok; 170 consumeToken(); 171 if (Tok.isNot(tok::html_equals)) { 172 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), 173 Ident.getHTMLIdent())); 174 continue; 175 } 176 Token Equals = Tok; 177 consumeToken(); 178 if (Tok.isNot(tok::html_quoted_string)) { 179 Diag(Tok.getLocation(), 180 diag::warn_doc_html_open_tag_expected_quoted_string) 181 << SourceRange(Equals.getLocation()); 182 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), 183 Ident.getHTMLIdent())); 184 while (Tok.is(tok::html_equals) || 185 Tok.is(tok::html_quoted_string)) 186 consumeToken(); 187 continue; 188 } 189 Attrs.push_back(HTMLOpenTagComment::Attribute( 190 Ident.getLocation(), 191 Ident.getHTMLIdent(), 192 Equals.getLocation(), 193 SourceRange(Tok.getLocation(), 194 Tok.getEndLocation()), 195 Tok.getHTMLQuotedString())); 196 consumeToken(); 197 continue; 198 } 199 200 case tok::html_greater: 201 HOT = S.actOnHTMLOpenTagFinish(HOT, 202 copyArray(llvm::makeArrayRef(Attrs)), 203 Tok.getLocation(), 204 /* IsSelfClosing = */ false); 205 consumeToken(); 206 return HOT; 207 208 case tok::html_slash_greater: 209 HOT = S.actOnHTMLOpenTagFinish(HOT, 210 copyArray(llvm::makeArrayRef(Attrs)), 211 Tok.getLocation(), 212 /* IsSelfClosing = */ true); 213 consumeToken(); 214 return HOT; 215 216 case tok::html_equals: 217 case tok::html_quoted_string: 218 Diag(Tok.getLocation(), 219 diag::warn_doc_html_open_tag_expected_ident_or_greater); 220 while (Tok.is(tok::html_equals) || 221 Tok.is(tok::html_quoted_string)) 222 consumeToken(); 223 if (Tok.is(tok::html_ident) || 224 Tok.is(tok::html_greater) || 225 Tok.is(tok::html_slash_greater)) 226 continue; 227 228 return S.actOnHTMLOpenTagFinish(HOT, 229 copyArray(llvm::makeArrayRef(Attrs)), 230 SourceLocation(), 231 /* IsSelfClosing = */ false); 232 233 default: 234 // Not a token from an HTML open tag. Thus HTML tag prematurely ended. 235 HOT = S.actOnHTMLOpenTagFinish(HOT, 236 copyArray(llvm::makeArrayRef(Attrs)), 237 SourceLocation(), 238 /* IsSelfClosing = */ false); 239 bool StartLineInvalid; 240 const unsigned StartLine = SourceMgr.getPresumedLineNumber( 241 HOT->getLocation(), 242 &StartLineInvalid); 243 bool EndLineInvalid; 244 const unsigned EndLine = SourceMgr.getPresumedLineNumber( 245 Tok.getLocation(), 246 &EndLineInvalid); 247 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 248 Diag(Tok.getLocation(), 249 diag::warn_doc_html_open_tag_expected_ident_or_greater) 250 << HOT->getSourceRange(); 251 else { 252 Diag(Tok.getLocation(), 253 diag::warn_doc_html_open_tag_expected_ident_or_greater); 254 Diag(HOT->getLocation(), diag::note_doc_html_tag_started_here) 255 << HOT->getSourceRange(); 256 } 257 return HOT; 258 } 259 } 260} 261 262HTMLCloseTagComment *Parser::parseHTMLCloseTag() { 263 assert(Tok.is(tok::html_tag_close)); 264 Token TokTagOpen = Tok; 265 consumeToken(); 266 SourceLocation Loc; 267 if (Tok.is(tok::html_greater)) { 268 Loc = Tok.getLocation(); 269 consumeToken(); 270 } 271 272 return S.actOnHTMLCloseTag(TokTagOpen.getLocation(), 273 Loc, 274 TokTagOpen.getHTMLTagCloseName()); 275} 276 277BlockContentComment *Parser::parseParagraphOrBlockCommand() { 278 SmallVector<InlineContentComment *, 8> Content; 279 280 while (true) { 281 switch (Tok.getKind()) { 282 case tok::verbatim_block_begin: 283 case tok::verbatim_line_name: 284 case tok::eof: 285 assert(Content.size() != 0); 286 break; // Block content or EOF ahead, finish this parapgaph. 287 288 case tok::command: 289 if (S.isBlockCommand(Tok.getCommandName())) { 290 if (Content.size() == 0) 291 return parseBlockCommand(); 292 break; // Block command ahead, finish this parapgaph. 293 } 294 if (S.isInlineCommand(Tok.getCommandName())) { 295 Content.push_back(parseInlineCommand()); 296 continue; 297 } 298 299 // Not a block command, not an inline command ==> an unknown command. 300 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 301 Tok.getEndLocation(), 302 Tok.getCommandName())); 303 consumeToken(); 304 continue; 305 306 case tok::newline: { 307 consumeToken(); 308 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 309 consumeToken(); 310 break; // Two newlines -- end of paragraph. 311 } 312 if (Content.size() > 0) 313 Content.back()->addTrailingNewline(); 314 continue; 315 } 316 317 // Don't deal with HTML tag soup now. 318 case tok::html_tag_open: 319 Content.push_back(parseHTMLOpenTag()); 320 continue; 321 322 case tok::html_tag_close: 323 Content.push_back(parseHTMLCloseTag()); 324 continue; 325 326 case tok::text: 327 Content.push_back(S.actOnText(Tok.getLocation(), 328 Tok.getEndLocation(), 329 Tok.getText())); 330 consumeToken(); 331 continue; 332 333 case tok::verbatim_block_line: 334 case tok::verbatim_block_end: 335 case tok::verbatim_line_text: 336 case tok::html_ident: 337 case tok::html_equals: 338 case tok::html_quoted_string: 339 case tok::html_greater: 340 case tok::html_slash_greater: 341 llvm_unreachable("should not see this token"); 342 } 343 break; 344 } 345 346 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); 347} 348 349VerbatimBlockComment *Parser::parseVerbatimBlock() { 350 assert(Tok.is(tok::verbatim_block_begin)); 351 352 VerbatimBlockComment *VB = 353 S.actOnVerbatimBlockStart(Tok.getLocation(), 354 Tok.getVerbatimBlockName()); 355 consumeToken(); 356 357 // Don't create an empty line if verbatim opening command is followed 358 // by a newline. 359 if (Tok.is(tok::newline)) 360 consumeToken(); 361 362 SmallVector<VerbatimBlockLineComment *, 8> Lines; 363 while (Tok.is(tok::verbatim_block_line) || 364 Tok.is(tok::newline)) { 365 VerbatimBlockLineComment *Line; 366 if (Tok.is(tok::verbatim_block_line)) { 367 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 368 Tok.getVerbatimBlockText()); 369 consumeToken(); 370 if (Tok.is(tok::newline)) { 371 consumeToken(); 372 } 373 } else { 374 // Empty line, just a tok::newline. 375 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 376 ""); 377 consumeToken(); 378 } 379 Lines.push_back(Line); 380 } 381 382 assert(Tok.is(tok::verbatim_block_end)); 383 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), 384 Tok.getVerbatimBlockName(), 385 copyArray(llvm::makeArrayRef(Lines))); 386 consumeToken(); 387 388 return VB; 389} 390 391VerbatimLineComment *Parser::parseVerbatimLine() { 392 assert(Tok.is(tok::verbatim_line_name)); 393 394 Token NameTok = Tok; 395 consumeToken(); 396 397 SourceLocation TextBegin; 398 StringRef Text; 399 // Next token might not be a tok::verbatim_line_text if verbatim line 400 // starting command comes just before a newline or comment end. 401 if (Tok.is(tok::verbatim_line_text)) { 402 TextBegin = Tok.getLocation(); 403 Text = Tok.getVerbatimLineText(); 404 } else { 405 TextBegin = NameTok.getEndLocation(); 406 Text = ""; 407 } 408 409 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 410 NameTok.getVerbatimLineName(), 411 TextBegin, 412 Text); 413 consumeToken(); 414 return VL; 415} 416 417BlockContentComment *Parser::parseBlockContent() { 418 switch (Tok.getKind()) { 419 case tok::text: 420 case tok::command: 421 case tok::html_tag_open: 422 case tok::html_tag_close: 423 return parseParagraphOrBlockCommand(); 424 425 case tok::verbatim_block_begin: 426 return parseVerbatimBlock(); 427 428 case tok::verbatim_line_name: 429 return parseVerbatimLine(); 430 431 case tok::eof: 432 case tok::newline: 433 case tok::verbatim_block_line: 434 case tok::verbatim_block_end: 435 case tok::verbatim_line_text: 436 case tok::html_ident: 437 case tok::html_equals: 438 case tok::html_quoted_string: 439 case tok::html_greater: 440 case tok::html_slash_greater: 441 llvm_unreachable("should not see this token"); 442 } 443 llvm_unreachable("bogus token kind"); 444} 445 446FullComment *Parser::parseFullComment() { 447 // Skip newlines at the beginning of the comment. 448 while (Tok.is(tok::newline)) 449 consumeToken(); 450 451 SmallVector<BlockContentComment *, 8> Blocks; 452 while (Tok.isNot(tok::eof)) { 453 Blocks.push_back(parseBlockContent()); 454 455 // Skip extra newlines after paragraph end. 456 while (Tok.is(tok::newline)) 457 consumeToken(); 458 } 459 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); 460} 461 462} // end namespace comments 463} // end namespace clang 464