CommentParser.cpp revision 8d3ba23f2d9e6c87794d059412a0808c9cbacb25
1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "clang/AST/CommentParser.h" 11#include "clang/AST/CommentSema.h" 12#include "llvm/Support/ErrorHandling.h" 13 14namespace clang { 15namespace comments { 16 17Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator): 18 L(L), S(S), Allocator(Allocator) { 19 consumeToken(); 20} 21 22ParamCommandComment *Parser::parseParamCommandArgs( 23 ParamCommandComment *PC, 24 TextTokenRetokenizer &Retokenizer) { 25 Token Arg; 26 // Check if argument looks like direction specification: [dir] 27 // e.g., [in], [out], [in,out] 28 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 29 PC = S.actOnParamCommandArg(PC, 30 Arg.getLocation(), 31 Arg.getEndLocation(), 32 Arg.getText(), 33 /* IsDirection = */ true); 34 35 if (Retokenizer.lexWord(Arg)) 36 StringRef ArgText = Arg.getText(); 37 PC = S.actOnParamCommandArg(PC, 38 Arg.getLocation(), 39 Arg.getEndLocation(), 40 Arg.getText(), 41 /* IsDirection = */ false); 42 43 return PC; 44} 45 46BlockCommandComment *Parser::parseBlockCommandArgs( 47 BlockCommandComment *BC, 48 TextTokenRetokenizer &Retokenizer, 49 unsigned NumArgs) { 50 typedef BlockCommandComment::Argument Argument; 51 Argument *Args = new (Allocator) Argument[NumArgs]; 52 unsigned ParsedArgs = 0; 53 Token Arg; 54 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 55 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), 56 Arg.getEndLocation()), 57 Arg.getText()); 58 ParsedArgs++; 59 } 60 61 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); 62} 63 64BlockCommandComment *Parser::parseBlockCommand() { 65 assert(Tok.is(tok::command)); 66 67 ParamCommandComment *PC; 68 BlockCommandComment *BC; 69 bool IsParam = false; 70 unsigned NumArgs = 0; 71 if (S.isParamCommand(Tok.getCommandName())) { 72 IsParam = true; 73 PC = S.actOnParamCommandStart(Tok.getLocation(), 74 Tok.getEndLocation(), 75 Tok.getCommandName()); 76 } else { 77 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); 78 BC = S.actOnBlockCommandStart(Tok.getLocation(), 79 Tok.getEndLocation(), 80 Tok.getCommandName()); 81 } 82 consumeToken(); 83 84 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { 85 // Block command ahead. We can't nest block commands, so pretend that this 86 // command has an empty argument. 87 // TODO: Diag() Warn empty arg to block command 88 ParagraphComment *PC = S.actOnParagraphComment( 89 ArrayRef<InlineContentComment *>()); 90 return S.actOnBlockCommandFinish(BC, PC); 91 } 92 93 if (IsParam || NumArgs > 0) { 94 // In order to parse command arguments we need to retokenize a few 95 // following text tokens. 96 TextTokenRetokenizer Retokenizer(Allocator); 97 while (Tok.is(tok::text)) { 98 if (Retokenizer.addToken(Tok)) 99 consumeToken(); 100 } 101 102 if (IsParam) 103 PC = parseParamCommandArgs(PC, Retokenizer); 104 else 105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); 106 107 // Put back tokens we didn't use. 108 Token Text; 109 while (Retokenizer.lexText(Text)) 110 putBack(Text); 111 } 112 113 BlockContentComment *Block = parseParagraphOrBlockCommand(); 114 // Since we have checked for a block command, we should have parsed a 115 // paragraph. 116 if (IsParam) 117 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); 118 else 119 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); 120} 121 122InlineCommandComment *Parser::parseInlineCommand() { 123 assert(Tok.is(tok::command)); 124 125 const Token CommandTok = Tok; 126 consumeToken(); 127 128 TextTokenRetokenizer Retokenizer(Allocator); 129 while (Tok.is(tok::text)) { 130 if (Retokenizer.addToken(Tok)) 131 consumeToken(); 132 } 133 134 Token ArgTok; 135 bool ArgTokValid = Retokenizer.lexWord(ArgTok); 136 137 InlineCommandComment *IC; 138 if (ArgTokValid) { 139 IC = S.actOnInlineCommand(CommandTok.getLocation(), 140 CommandTok.getEndLocation(), 141 CommandTok.getCommandName(), 142 ArgTok.getLocation(), 143 ArgTok.getEndLocation(), 144 ArgTok.getText()); 145 } else { 146 IC = S.actOnInlineCommand(CommandTok.getLocation(), 147 CommandTok.getEndLocation(), 148 CommandTok.getCommandName()); 149 } 150 151 Token Text; 152 while (Retokenizer.lexText(Text)) 153 putBack(Text); 154 155 return IC; 156} 157 158HTMLOpenTagComment *Parser::parseHTMLOpenTag() { 159 assert(Tok.is(tok::html_tag_open)); 160 HTMLOpenTagComment *HOT = 161 S.actOnHTMLOpenTagStart(Tok.getLocation(), 162 Tok.getHTMLTagOpenName()); 163 consumeToken(); 164 165 SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs; 166 while (true) { 167 if (Tok.is(tok::html_ident)) { 168 Token Ident = Tok; 169 consumeToken(); 170 if (Tok.isNot(tok::html_equals)) { 171 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), 172 Ident.getHTMLIdent())); 173 continue; 174 } 175 Token Equals = Tok; 176 consumeToken(); 177 if (Tok.isNot(tok::html_quoted_string)) { 178 // TODO: Diag() expected quoted string 179 Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), 180 Ident.getHTMLIdent())); 181 continue; 182 } 183 Attrs.push_back(HTMLOpenTagComment::Attribute( 184 Ident.getLocation(), 185 Ident.getHTMLIdent(), 186 Equals.getLocation(), 187 SourceRange(Tok.getLocation(), 188 Tok.getEndLocation()), 189 Tok.getHTMLQuotedString())); 190 consumeToken(); 191 continue; 192 } else if (Tok.is(tok::html_greater)) { 193 HOT = S.actOnHTMLOpenTagFinish(HOT, 194 copyArray(llvm::makeArrayRef(Attrs)), 195 Tok.getLocation()); 196 consumeToken(); 197 return HOT; 198 } else if (Tok.is(tok::html_equals) || 199 Tok.is(tok::html_quoted_string)) { 200 // TODO: Diag() Err expected ident 201 while (Tok.is(tok::html_equals) || 202 Tok.is(tok::html_quoted_string)) 203 consumeToken(); 204 } else { 205 // Not a token from HTML open tag. Thus HTML tag prematurely ended. 206 // TODO: Diag() Err HTML tag prematurely ended 207 return S.actOnHTMLOpenTagFinish(HOT, 208 copyArray(llvm::makeArrayRef(Attrs)), 209 SourceLocation()); 210 } 211 } 212} 213 214HTMLCloseTagComment *Parser::parseHTMLCloseTag() { 215 assert(Tok.is(tok::html_tag_close)); 216 Token TokTagOpen = Tok; 217 consumeToken(); 218 SourceLocation Loc; 219 if (Tok.is(tok::html_greater)) { 220 Loc = Tok.getLocation(); 221 consumeToken(); 222 } 223 224 return S.actOnHTMLCloseTag(TokTagOpen.getLocation(), 225 Loc, 226 TokTagOpen.getHTMLTagCloseName()); 227} 228 229BlockContentComment *Parser::parseParagraphOrBlockCommand() { 230 SmallVector<InlineContentComment *, 8> Content; 231 232 while (true) { 233 switch (Tok.getKind()) { 234 case tok::verbatim_block_begin: 235 case tok::verbatim_line_name: 236 case tok::eof: 237 assert(Content.size() != 0); 238 break; // Block content or EOF ahead, finish this parapgaph. 239 240 case tok::command: 241 if (S.isBlockCommand(Tok.getCommandName())) { 242 if (Content.size() == 0) 243 return parseBlockCommand(); 244 break; // Block command ahead, finish this parapgaph. 245 } 246 if (S.isInlineCommand(Tok.getCommandName())) { 247 Content.push_back(parseInlineCommand()); 248 continue; 249 } 250 251 // Not a block command, not an inline command ==> an unknown command. 252 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 253 Tok.getEndLocation(), 254 Tok.getCommandName())); 255 consumeToken(); 256 continue; 257 258 case tok::newline: { 259 consumeToken(); 260 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 261 consumeToken(); 262 break; // Two newlines -- end of paragraph. 263 } 264 if (Content.size() > 0) 265 Content.back()->addTrailingNewline(); 266 continue; 267 } 268 269 // Don't deal with HTML tag soup now. 270 case tok::html_tag_open: 271 Content.push_back(parseHTMLOpenTag()); 272 continue; 273 274 case tok::html_tag_close: 275 Content.push_back(parseHTMLCloseTag()); 276 continue; 277 278 case tok::text: 279 Content.push_back(S.actOnText(Tok.getLocation(), 280 Tok.getEndLocation(), 281 Tok.getText())); 282 consumeToken(); 283 continue; 284 285 case tok::verbatim_block_line: 286 case tok::verbatim_block_end: 287 case tok::verbatim_line_text: 288 case tok::html_ident: 289 case tok::html_equals: 290 case tok::html_quoted_string: 291 case tok::html_greater: 292 llvm_unreachable("should not see this token"); 293 } 294 break; 295 } 296 297 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); 298} 299 300VerbatimBlockComment *Parser::parseVerbatimBlock() { 301 assert(Tok.is(tok::verbatim_block_begin)); 302 303 VerbatimBlockComment *VB = 304 S.actOnVerbatimBlockStart(Tok.getLocation(), 305 Tok.getVerbatimBlockName()); 306 consumeToken(); 307 308 // Don't create an empty line if verbatim opening command is followed 309 // by a newline. 310 if (Tok.is(tok::newline)) 311 consumeToken(); 312 313 SmallVector<VerbatimBlockLineComment *, 8> Lines; 314 while (Tok.is(tok::verbatim_block_line) || 315 Tok.is(tok::newline)) { 316 VerbatimBlockLineComment *Line; 317 if (Tok.is(tok::verbatim_block_line)) { 318 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 319 Tok.getVerbatimBlockText()); 320 consumeToken(); 321 if (Tok.is(tok::newline)) { 322 consumeToken(); 323 } 324 } else { 325 // Empty line, just a tok::newline. 326 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 327 ""); 328 consumeToken(); 329 } 330 Lines.push_back(Line); 331 } 332 333 assert(Tok.is(tok::verbatim_block_end)); 334 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), 335 Tok.getVerbatimBlockName(), 336 copyArray(llvm::makeArrayRef(Lines))); 337 consumeToken(); 338 339 return VB; 340} 341 342VerbatimLineComment *Parser::parseVerbatimLine() { 343 assert(Tok.is(tok::verbatim_line_name)); 344 345 Token NameTok = Tok; 346 consumeToken(); 347 348 SourceLocation TextBegin; 349 StringRef Text; 350 // Next token might not be a tok::verbatim_line_text if verbatim line 351 // starting command comes just before a newline or comment end. 352 if (Tok.is(tok::verbatim_line_text)) { 353 TextBegin = Tok.getLocation(); 354 Text = Tok.getVerbatimLineText(); 355 } else { 356 TextBegin = NameTok.getEndLocation(); 357 Text = ""; 358 } 359 360 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 361 NameTok.getVerbatimLineName(), 362 TextBegin, 363 Text); 364 consumeToken(); 365 return VL; 366} 367 368BlockContentComment *Parser::parseBlockContent() { 369 switch (Tok.getKind()) { 370 case tok::text: 371 case tok::command: 372 case tok::html_tag_open: 373 case tok::html_tag_close: 374 return parseParagraphOrBlockCommand(); 375 376 case tok::verbatim_block_begin: 377 return parseVerbatimBlock(); 378 379 case tok::verbatim_line_name: 380 return parseVerbatimLine(); 381 382 case tok::eof: 383 case tok::newline: 384 case tok::verbatim_block_line: 385 case tok::verbatim_block_end: 386 case tok::verbatim_line_text: 387 case tok::html_ident: 388 case tok::html_equals: 389 case tok::html_quoted_string: 390 case tok::html_greater: 391 llvm_unreachable("should not see this token"); 392 } 393} 394 395FullComment *Parser::parseFullComment() { 396 // Skip newlines at the beginning of the comment. 397 while (Tok.is(tok::newline)) 398 consumeToken(); 399 400 SmallVector<BlockContentComment *, 8> Blocks; 401 while (Tok.isNot(tok::eof)) { 402 Blocks.push_back(parseBlockContent()); 403 404 // Skip extra newlines after paragraph end. 405 while (Tok.is(tok::newline)) 406 consumeToken(); 407 } 408 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); 409} 410 411} // end namespace comments 412} // end namespace clang 413 414 415