HTMLRewrite.cpp revision 2f10398814e8d58cce029a7e73af21bb274dcd42
1//== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the HTMLRewriter clas, which is used to translate the 11// text of a source file into prettified HTML. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Lex/Preprocessor.h" 16#include "clang/Rewrite/Rewriter.h" 17#include "clang/Rewrite/HTMLRewrite.h" 18#include "clang/Lex/TokenConcatenation.h" 19#include "clang/Lex/Preprocessor.h" 20#include "clang/Basic/SourceManager.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/ADT/OwningPtr.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/Support/raw_ostream.h" 25using namespace clang; 26 27 28/// HighlightRange - Highlight a range in the source code with the specified 29/// start/end tags. B/E must be in the same file. This ensures that 30/// start/end tags are placed at the start/end of each line if the range is 31/// multiline. 32void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E, 33 const char *StartTag, const char *EndTag) { 34 SourceManager &SM = R.getSourceMgr(); 35 B = SM.getInstantiationLoc(B); 36 E = SM.getInstantiationLoc(E); 37 FileID FID = SM.getFileID(B); 38 assert(SM.getFileID(E) == FID && "B/E not in the same file!"); 39 40 unsigned BOffset = SM.getFileOffset(B); 41 unsigned EOffset = SM.getFileOffset(E); 42 43 // Include the whole end token in the range. 44 EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr()); 45 46 HighlightRange(R.getEditBuffer(FID), BOffset, EOffset, 47 SM.getBufferData(FID).first, StartTag, EndTag); 48} 49 50/// HighlightRange - This is the same as the above method, but takes 51/// decomposed file locations. 52void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E, 53 const char *BufferStart, 54 const char *StartTag, const char *EndTag) { 55 // Insert the tag at the absolute start/end of the range. 56 RB.InsertTextAfter(B, StartTag, strlen(StartTag)); 57 RB.InsertTextBefore(E, EndTag, strlen(EndTag)); 58 59 // Scan the range to see if there is a \r or \n. If so, and if the line is 60 // not blank, insert tags on that line as well. 61 bool HadOpenTag = true; 62 63 unsigned LastNonWhiteSpace = B; 64 for (unsigned i = B; i != E; ++i) { 65 switch (BufferStart[i]) { 66 case '\r': 67 case '\n': 68 // Okay, we found a newline in the range. If we have an open tag, we need 69 // to insert a close tag at the first non-whitespace before the newline. 70 if (HadOpenTag) 71 RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag, strlen(EndTag)); 72 73 // Instead of inserting an open tag immediately after the newline, we 74 // wait until we see a non-whitespace character. This prevents us from 75 // inserting tags around blank lines, and also allows the open tag to 76 // be put *after* whitespace on a non-blank line. 77 HadOpenTag = false; 78 break; 79 case '\0': 80 case ' ': 81 case '\t': 82 case '\f': 83 case '\v': 84 // Ignore whitespace. 85 break; 86 87 default: 88 // If there is no tag open, do it now. 89 if (!HadOpenTag) { 90 RB.InsertTextAfter(i, StartTag, strlen(StartTag)); 91 HadOpenTag = true; 92 } 93 94 // Remember this character. 95 LastNonWhiteSpace = i; 96 break; 97 } 98 } 99} 100 101void html::EscapeText(Rewriter &R, FileID FID, 102 bool EscapeSpaces, bool ReplaceTabs) { 103 104 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 105 const char* C = Buf->getBufferStart(); 106 const char* FileEnd = Buf->getBufferEnd(); 107 108 assert (C <= FileEnd); 109 110 RewriteBuffer &RB = R.getEditBuffer(FID); 111 112 unsigned ColNo = 0; 113 for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { 114 switch (*C) { 115 default: ++ColNo; break; 116 case '\n': 117 case '\r': 118 ColNo = 0; 119 break; 120 121 case ' ': 122 if (EscapeSpaces) 123 RB.ReplaceText(FilePos, 1, " ", 6); 124 ++ColNo; 125 break; 126 case '\f': 127 RB.ReplaceText(FilePos, 1, "<hr>", 4); 128 ColNo = 0; 129 break; 130 131 case '\t': { 132 if (!ReplaceTabs) 133 break; 134 unsigned NumSpaces = 8-(ColNo&7); 135 if (EscapeSpaces) 136 RB.ReplaceText(FilePos, 1, " " 137 " ", 6*NumSpaces); 138 else 139 RB.ReplaceText(FilePos, 1, " ", NumSpaces); 140 ColNo += NumSpaces; 141 break; 142 } 143 case '<': 144 RB.ReplaceText(FilePos, 1, "<", 4); 145 ++ColNo; 146 break; 147 148 case '>': 149 RB.ReplaceText(FilePos, 1, ">", 4); 150 ++ColNo; 151 break; 152 153 case '&': 154 RB.ReplaceText(FilePos, 1, "&", 5); 155 ++ColNo; 156 break; 157 } 158 } 159} 160 161std::string html::EscapeText(const std::string& s, bool EscapeSpaces, 162 bool ReplaceTabs) { 163 164 unsigned len = s.size(); 165 std::string Str; 166 llvm::raw_string_ostream os(Str); 167 168 for (unsigned i = 0 ; i < len; ++i) { 169 170 char c = s[i]; 171 switch (c) { 172 default: 173 os << c; break; 174 175 case ' ': 176 if (EscapeSpaces) os << " "; 177 else os << ' '; 178 break; 179 180 case '\t': 181 if (ReplaceTabs) { 182 if (EscapeSpaces) 183 for (unsigned i = 0; i < 4; ++i) 184 os << " "; 185 else 186 for (unsigned i = 0; i < 4; ++i) 187 os << " "; 188 } 189 else 190 os << c; 191 192 break; 193 194 case '<': os << "<"; break; 195 case '>': os << ">"; break; 196 case '&': os << "&"; break; 197 } 198 } 199 200 return os.str(); 201} 202 203static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, 204 unsigned B, unsigned E) { 205 llvm::SmallString<100> Str; 206 Str += "<tr><td class=\"num\" id=\"LN"; 207 Str.append_uint(LineNo); 208 Str += "\">"; 209 Str.append_uint(LineNo); 210 Str += "</td><td class=\"line\">"; 211 212 if (B == E) { // Handle empty lines. 213 Str += " </td></tr>"; 214 RB.InsertTextBefore(B, &Str[0], Str.size()); 215 } else { 216 RB.InsertTextBefore(B, &Str[0], Str.size()); 217 RB.InsertTextBefore(E, "</td></tr>", strlen("</td></tr>")); 218 } 219} 220 221void html::AddLineNumbers(Rewriter& R, FileID FID) { 222 223 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 224 const char* FileBeg = Buf->getBufferStart(); 225 const char* FileEnd = Buf->getBufferEnd(); 226 const char* C = FileBeg; 227 RewriteBuffer &RB = R.getEditBuffer(FID); 228 229 assert (C <= FileEnd); 230 231 unsigned LineNo = 0; 232 unsigned FilePos = 0; 233 234 while (C != FileEnd) { 235 236 ++LineNo; 237 unsigned LineStartPos = FilePos; 238 unsigned LineEndPos = FileEnd - FileBeg; 239 240 assert (FilePos <= LineEndPos); 241 assert (C < FileEnd); 242 243 // Scan until the newline (or end-of-file). 244 245 while (C != FileEnd) { 246 char c = *C; 247 ++C; 248 249 if (c == '\n') { 250 LineEndPos = FilePos++; 251 break; 252 } 253 254 ++FilePos; 255 } 256 257 AddLineNumber(RB, LineNo, LineStartPos, LineEndPos); 258 } 259 260 // Add one big table tag that surrounds all of the code. 261 RB.InsertTextBefore(0, "<table class=\"code\">\n", 262 strlen("<table class=\"code\">\n")); 263 264 RB.InsertTextAfter(FileEnd - FileBeg, "</table>", strlen("</table>")); 265} 266 267void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID, 268 const char *title) { 269 270 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 271 const char* FileStart = Buf->getBufferStart(); 272 const char* FileEnd = Buf->getBufferEnd(); 273 274 SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID); 275 SourceLocation EndLoc = StartLoc.getFileLocWithOffset(FileEnd-FileStart); 276 277 std::string s; 278 llvm::raw_string_ostream os(s); 279 os << "<!doctype html>\n" // Use HTML 5 doctype 280 "<html>\n<head>\n"; 281 282 if (title) 283 os << "<title>" << html::EscapeText(title) << "</title>\n"; 284 285 os << "<style type=\"text/css\">\n" 286 " body { color:#000000; background-color:#ffffff }\n" 287 " body { font-family:Helvetica, sans-serif; font-size:10pt }\n" 288 " h1 { font-size:14pt }\n" 289 " .code { border-collapse:collapse; width:100%; }\n" 290 " .code { font-family: \"Andale Mono\", monospace; font-size:10pt }\n" 291 " .code { line-height: 1.2em }\n" 292 " .comment { color: green; font-style: oblique }\n" 293 " .keyword { color: blue }\n" 294 " .string_literal { color: red }\n" 295 " .directive { color: darkmagenta }\n" 296 // Macro expansions. 297 " .expansion { display: none; }\n" 298 " .macro:hover .expansion { display: block; border: 2px solid #FF0000; " 299 "padding: 2px; background-color:#FFF0F0; font-weight: normal; " 300 " -webkit-border-radius:5px; -webkit-box-shadow:1px 1px 7px #000; " 301 "position: absolute; top: -1em; left:10em; z-index: 1 } \n" 302 " .macro { color: darkmagenta; background-color:LemonChiffon;" 303 // Macros are position: relative to provide base for expansions. 304 " position: relative }\n" 305 " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n" 306 " .num { text-align:right; font-size: smaller }\n" 307 " .num { color:#444444 }\n" 308 " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n" 309 " .line { white-space: pre }\n" 310 " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n" 311 " .msg { -webkit-border-radius:5px }\n" 312 " .msg { font-family:Helvetica, sans-serif; font-size: smaller }\n" 313 " .msg { float:left }\n" 314 " .msg { padding:0.5em 1ex 0.5em 1ex }\n" 315 " .msg { margin-top:10px; margin-bottom:10px }\n" 316 " .msg { font-weight:bold }\n" 317 " .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap;}\n" 318 " .msgEvent { background-color:#fff8b4; color:#000000 }\n" 319 " .msgControl { background-color:#bbbbbb; color:#444444 }\n" 320 " .mrange { background-color:#dfddf3 }\n" 321 " .mrange { border-bottom:1px solid #6F9DBE }\n" 322 " .PathIndex { font-weight: bold }\n" 323 " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n" 324 " .CodeRemovalHint { background-color:#de1010 }\n" 325 " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n" 326 " table.simpletable {\n" 327 " padding: 5px;\n" 328 " font-size:12pt;\n" 329 " margin:20px;\n" 330 " border-collapse: collapse; border-spacing: 0px;\n" 331 " }\n" 332 " td.rowname {\n" 333 " text-align:right; font-weight:bold; color:#444444;\n" 334 " padding-right:2ex; }\n" 335 "</style>\n</head>\n<body>"; 336 337 // Generate header 338 R.InsertStrBefore(StartLoc, os.str()); 339 // Generate footer 340 341 R.InsertCStrAfter(EndLoc, "</body></html>\n"); 342} 343 344/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with 345/// information about keywords, macro expansions etc. This uses the macro 346/// table state from the end of the file, so it won't be perfectly perfect, 347/// but it will be reasonably close. 348void html::SyntaxHighlight(Rewriter &R, FileID FID, Preprocessor &PP) { 349 RewriteBuffer &RB = R.getEditBuffer(FID); 350 351 const SourceManager &SM = PP.getSourceManager(); 352 Lexer L(FID, SM, PP.getLangOptions()); 353 const char *BufferStart = L.getBufferStart(); 354 355 // Inform the preprocessor that we want to retain comments as tokens, so we 356 // can highlight them. 357 L.SetCommentRetentionState(true); 358 359 // Lex all the tokens in raw mode, to avoid entering #includes or expanding 360 // macros. 361 Token Tok; 362 L.LexFromRawLexer(Tok); 363 364 while (Tok.isNot(tok::eof)) { 365 // Since we are lexing unexpanded tokens, all tokens are from the main 366 // FileID. 367 unsigned TokOffs = SM.getFileOffset(Tok.getLocation()); 368 unsigned TokLen = Tok.getLength(); 369 switch (Tok.getKind()) { 370 default: break; 371 case tok::identifier: { 372 // Fill in Result.IdentifierInfo, looking up the identifier in the 373 // identifier table. 374 IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs); 375 376 // If this is a pp-identifier, for a keyword, highlight it as such. 377 if (II->getTokenID() != tok::identifier) 378 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 379 "<span class='keyword'>", "</span>"); 380 break; 381 } 382 case tok::comment: 383 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 384 "<span class='comment'>", "</span>"); 385 break; 386 case tok::wide_string_literal: 387 // Chop off the L prefix 388 ++TokOffs; 389 --TokLen; 390 // FALL THROUGH. 391 case tok::string_literal: 392 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 393 "<span class='string_literal'>", "</span>"); 394 break; 395 case tok::hash: { 396 // If this is a preprocessor directive, all tokens to end of line are too. 397 if (!Tok.isAtStartOfLine()) 398 break; 399 400 // Eat all of the tokens until we get to the next one at the start of 401 // line. 402 unsigned TokEnd = TokOffs+TokLen; 403 L.LexFromRawLexer(Tok); 404 while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) { 405 TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength(); 406 L.LexFromRawLexer(Tok); 407 } 408 409 // Find end of line. This is a hack. 410 HighlightRange(RB, TokOffs, TokEnd, BufferStart, 411 "<span class='directive'>", "</span>"); 412 413 // Don't skip the next token. 414 continue; 415 } 416 } 417 418 L.LexFromRawLexer(Tok); 419 } 420} 421 422/// HighlightMacros - This uses the macro table state from the end of the 423/// file, to re-expand macros and insert (into the HTML) information about the 424/// macro expansions. This won't be perfectly perfect, but it will be 425/// reasonably close. 426void html::HighlightMacros(Rewriter &R, FileID FID, Preprocessor& PP) { 427 // Re-lex the raw token stream into a token buffer. 428 const SourceManager &SM = PP.getSourceManager(); 429 std::vector<Token> TokenStream; 430 431 Lexer L(FID, SM, PP.getLangOptions()); 432 433 // Lex all the tokens in raw mode, to avoid entering #includes or expanding 434 // macros. 435 while (1) { 436 Token Tok; 437 L.LexFromRawLexer(Tok); 438 439 // If this is a # at the start of a line, discard it from the token stream. 440 // We don't want the re-preprocess step to see #defines, #includes or other 441 // preprocessor directives. 442 if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) 443 continue; 444 445 // If this is a ## token, change its kind to unknown so that repreprocessing 446 // it will not produce an error. 447 if (Tok.is(tok::hashhash)) 448 Tok.setKind(tok::unknown); 449 450 // If this raw token is an identifier, the raw lexer won't have looked up 451 // the corresponding identifier info for it. Do this now so that it will be 452 // macro expanded when we re-preprocess it. 453 if (Tok.is(tok::identifier)) { 454 // Change the kind of this identifier to the appropriate token kind, e.g. 455 // turning "for" into a keyword. 456 Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID()); 457 } 458 459 TokenStream.push_back(Tok); 460 461 if (Tok.is(tok::eof)) break; 462 } 463 464 // Inform the preprocessor that we don't want comments. 465 PP.SetCommentRetentionState(false, false); 466 467 // Enter the tokens we just lexed. This will cause them to be macro expanded 468 // but won't enter sub-files (because we removed #'s). 469 PP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false); 470 471 TokenConcatenation ConcatInfo(PP); 472 473 // Lex all the tokens. 474 Token Tok; 475 PP.Lex(Tok); 476 while (Tok.isNot(tok::eof)) { 477 // Ignore non-macro tokens. 478 if (!Tok.getLocation().isMacroID()) { 479 PP.Lex(Tok); 480 continue; 481 } 482 483 // Okay, we have the first token of a macro expansion: highlight the 484 // instantiation by inserting a start tag before the macro instantiation and 485 // end tag after it. 486 std::pair<SourceLocation, SourceLocation> LLoc = 487 SM.getInstantiationRange(Tok.getLocation()); 488 489 // Ignore tokens whose instantiation location was not the main file. 490 if (SM.getFileID(LLoc.first) != FID) { 491 PP.Lex(Tok); 492 continue; 493 } 494 495 assert(SM.getFileID(LLoc.second) == FID && 496 "Start and end of expansion must be in the same ultimate file!"); 497 498 std::string Expansion = PP.getSpelling(Tok); 499 unsigned LineLen = Expansion.size(); 500 501 Token PrevTok = Tok; 502 // Okay, eat this token, getting the next one. 503 PP.Lex(Tok); 504 505 // Skip all the rest of the tokens that are part of this macro 506 // instantiation. It would be really nice to pop up a window with all the 507 // spelling of the tokens or something. 508 while (!Tok.is(tok::eof) && 509 SM.getInstantiationLoc(Tok.getLocation()) == LLoc.first) { 510 // Insert a newline if the macro expansion is getting large. 511 if (LineLen > 60) { 512 Expansion += "<br>"; 513 LineLen = 0; 514 } 515 516 LineLen -= Expansion.size(); 517 518 // If the tokens were already space separated, or if they must be to avoid 519 // them being implicitly pasted, add a space between them. 520 if (Tok.hasLeadingSpace() || 521 ConcatInfo.AvoidConcat(PrevTok, Tok)) 522 Expansion += ' '; 523 524 // Escape any special characters in the token text. 525 Expansion += EscapeText(PP.getSpelling(Tok)); 526 LineLen += Expansion.size(); 527 528 PrevTok = Tok; 529 PP.Lex(Tok); 530 } 531 532 533 // Insert the expansion as the end tag, so that multi-line macros all get 534 // highlighted. 535 Expansion = "<span class='expansion'>" + Expansion + "</span></span>"; 536 537 HighlightRange(R, LLoc.first, LLoc.second, 538 "<span class='macro'>", Expansion.c_str()); 539 } 540} 541 542void html::HighlightMacros(Rewriter &R, FileID FID, 543 PreprocessorFactory &PPF) { 544 545 llvm::OwningPtr<Preprocessor> PP(PPF.CreatePreprocessor()); 546 HighlightMacros(R, FID, *PP); 547} 548