HTMLRewrite.cpp revision 678c6358c8d4e368c78629099142397c63c1ee35
1//== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the HTMLRewriter clas, which is used to translate the 11// text of a source file into prettified HTML. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Rewrite/Rewriter.h" 16#include "clang/Rewrite/HTMLRewrite.h" 17#include "clang/Lex/Preprocessor.h" 18#include "clang/Basic/SourceManager.h" 19#include "llvm/ADT/SmallString.h" 20#include "llvm/Support/MemoryBuffer.h" 21#include <sstream> 22using namespace clang; 23 24void html::EscapeText(Rewriter& R, unsigned FileID, 25 bool EscapeSpaces, bool ReplaceTabs) { 26 27 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); 28 const char* C = Buf->getBufferStart(); 29 const char* FileEnd = Buf->getBufferEnd(); 30 31 assert (C <= FileEnd); 32 33 RewriteBuffer &RB = R.getEditBuffer(FileID); 34 35 for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { 36 37 switch (*C) { 38 default: break; 39 40 case ' ': 41 if (EscapeSpaces) 42 RB.ReplaceText(FilePos, 1, " ", 6); 43 break; 44 45 case '\t': 46 if (!ReplaceTabs) 47 break; 48 if (EscapeSpaces) 49 RB.ReplaceText(FilePos, 1, " ", 6*4); 50 else 51 RB.ReplaceText(FilePos, 1, " ", 4); 52 break; 53 54 case '<': 55 RB.ReplaceText(FilePos, 1, "<", 4); 56 break; 57 58 case '>': 59 RB.ReplaceText(FilePos, 1, ">", 4); 60 break; 61 62 case '&': 63 RB.ReplaceText(FilePos, 1, "&", 5); 64 break; 65 } 66 } 67} 68 69std::string html::EscapeText(const std::string& s, bool EscapeSpaces, 70 bool ReplaceTabs) { 71 72 unsigned len = s.size(); 73 std::ostringstream os; 74 75 for (unsigned i = 0 ; i < len; ++i) { 76 77 char c = s[i]; 78 switch (c) { 79 default: 80 os << c; break; 81 82 case ' ': 83 if (EscapeSpaces) os << " "; 84 else os << ' '; 85 break; 86 87 case '\t': 88 if (ReplaceTabs) 89 for (unsigned i = 0; i < 4; ++i) 90 os << " "; 91 else 92 os << c; 93 94 break; 95 96 case '<': os << "<"; break; 97 case '>': os << ">"; break; 98 case '&': os << "&"; break; 99 } 100 } 101 102 return os.str(); 103} 104 105static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, 106 unsigned B, unsigned E) { 107 llvm::SmallString<100> Str; 108 Str += "<tr><td class=\"num\" id=\"LN"; 109 Str.append_uint(LineNo); 110 Str += "\">"; 111 Str.append_uint(LineNo); 112 Str += "</td><td class=\"line\">"; 113 114 if (B == E) { // Handle empty lines. 115 Str += " </td></tr>"; 116 RB.InsertTextBefore(B, &Str[0], Str.size()); 117 } else { 118 RB.InsertTextBefore(B, &Str[0], Str.size()); 119 RB.InsertTextBefore(E, "</td></tr>", strlen("</td></tr>")); 120 } 121} 122 123void html::AddLineNumbers(Rewriter& R, unsigned FileID) { 124 125 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); 126 const char* FileBeg = Buf->getBufferStart(); 127 const char* FileEnd = Buf->getBufferEnd(); 128 const char* C = FileBeg; 129 RewriteBuffer &RB = R.getEditBuffer(FileID); 130 131 assert (C <= FileEnd); 132 133 unsigned LineNo = 0; 134 unsigned FilePos = 0; 135 136 while (C != FileEnd) { 137 138 ++LineNo; 139 unsigned LineStartPos = FilePos; 140 unsigned LineEndPos = FileEnd - FileBeg; 141 142 assert (FilePos <= LineEndPos); 143 assert (C < FileEnd); 144 145 // Scan until the newline (or end-of-file). 146 147 while (C != FileEnd) { 148 char c = *C; 149 ++C; 150 151 if (c == '\n') { 152 LineEndPos = FilePos++; 153 break; 154 } 155 156 ++FilePos; 157 } 158 159 AddLineNumber(RB, LineNo, LineStartPos, LineEndPos); 160 } 161 162 // Add one big table tag that surrounds all of the code. 163 RB.InsertTextBefore(0, "<table class=\"code\">\n", 164 strlen("<table class=\"code\">\n")); 165 166 RB.InsertTextAfter(FileEnd - FileBeg, "</table>", strlen("</table>")); 167} 168 169void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, unsigned FileID) { 170 171 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); 172 const char* FileStart = Buf->getBufferStart(); 173 const char* FileEnd = Buf->getBufferEnd(); 174 175 SourceLocation StartLoc = SourceLocation::getFileLoc(FileID, 0); 176 SourceLocation EndLoc = SourceLocation::getFileLoc(FileID, FileEnd-FileStart); 177 178 // Generate header 179 R.InsertCStrBefore(StartLoc, 180 "<html>\n<head>\n" 181 "<style type=\"text/css\">\n" 182 " body { color:#000000; background-color:#ffffff }\n" 183 " body { font-family:Helvetica, sans-serif; font-size:10pt }\n" 184 " h1 { font-size:14pt }\n" 185 " .code { border-spacing:0px; width:100%; }\n" 186 " .code { font-family: \"Andale Mono\", monospace; font-size:10pt }\n" 187 " .code { line-height: 1.2em }\n" 188 " .comment { color: #A0A0A0 }\n" 189 " .keyword { color: #FF00FF }\n" 190 " .directive { color: #FFFF00 }\n" 191 " .macro { color: #FF0000; background-color:#FFC0C0 }\n" 192 " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n" 193 " .num { text-align:right; font-size: smaller }\n" 194 " .num { color:#444444 }\n" 195 " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n" 196 " .line { white-space: pre }\n" 197 " .msg { background-color:#fff8b4; color:#000000 }\n" 198 " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n" 199 " .msg { -webkit-border-radius:5px }\n" 200 " .msg { font-family:Helvetica, sans-serif; font-size: smaller }\n" 201 " .msg { font-weight: bold }\n" 202 " .msg { float:left }\n" 203 " .msg { padding:0.5em 1ex 0.5em 1ex }\n" 204 " .msg { margin-top:10px; margin-bottom:10px }\n" 205 " .mrange { background-color:#dfddf3 }\n" 206 " .mrange { border-bottom:1px solid #6F9DBE }\n" 207 " .PathIndex { font-weight: bold }\n" 208 " table.simpletable {\n" 209 " padding: 5px;\n" 210 " font-size:12pt;\n" 211 " margin:20px;\n" 212 " border-collapse: collapse; border-spacing: 0px;\n" 213 " }\n" 214 " td.rowname {\n" 215 " text-align:right; font-weight:bold; color:#444444;\n" 216 " padding-right:2ex; }\n" 217 "</style>\n</head>\n<body>"); 218 219 // Generate footer 220 221 R.InsertCStrAfter(EndLoc, "</body></html>\n"); 222} 223 224/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with 225/// information about keywords, macro expansions etc. This uses the macro 226/// table state from the end of the file, so it won't be perfectly perfect, 227/// but it will be reasonably close. 228void html::SyntaxHighlight(Rewriter &R, unsigned FileID, Preprocessor &PP) { 229 RewriteBuffer &RB = R.getEditBuffer(FileID); 230 231 const SourceManager &SourceMgr = PP.getSourceManager(); 232 std::pair<const char*, const char*> File = SourceMgr.getBufferData(FileID); 233 const char *BufferStart = File.first; 234 235 Lexer L(SourceLocation::getFileLoc(FileID, 0), PP.getLangOptions(), 236 File.first, File.second); 237 238 // Inform the preprocessor that we want to retain comments as tokens, so we 239 // can highlight them. 240 L.SetCommentRetentionState(true); 241 242 // Lex all the tokens in raw mode, to avoid entering #includes or expanding 243 // macros. 244 Token Tok; 245 L.LexRawToken(Tok); 246 247 while (Tok.isNot(tok::eof)) { 248 // Since we are lexing unexpanded tokens, all tokens are from the main 249 // FileID. 250 unsigned TokOffs = SourceMgr.getFullFilePos(Tok.getLocation()); 251 unsigned TokLen = Tok.getLength(); 252 switch (Tok.getKind()) { 253 default: break; 254 case tok::identifier: { 255 // Fill in Result.IdentifierInfo, looking up the identifier in the 256 // identifier table. 257 IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs); 258 259 // If this is a pp-identifier, for a keyword, highlight it as such. 260 if (II->getTokenID() != tok::identifier) { 261 RB.InsertTextAfter(TokOffs, "<span class='keyword'>", 262 strlen("<span class='keyword'>")); 263 RB.InsertTextBefore(TokOffs+TokLen, "</span>", strlen("</span>")); 264 } 265 break; 266 } 267 case tok::comment: 268 RB.InsertTextAfter(TokOffs, "<span class='comment'>", 269 strlen("<span class='comment'>")); 270 RB.InsertTextBefore(TokOffs+TokLen, "</span>", strlen("</span>")); 271 break; 272 case tok::hash: 273 // FIXME: This isn't working because we're not in raw mode in the lexer. 274 // Just cons up our own lexer here? 275 276 // If this is a preprocessor directive, all tokens to end of line are too. 277 if (Tok.isAtStartOfLine()) { 278 RB.InsertTextAfter(TokOffs, "<span class='directive'>", 279 strlen("<span class='directive'>")); 280 // Find end of line. This is a hack. 281 const char *LineEnd = SourceMgr.getCharacterData(Tok.getLocation()); 282 unsigned TokEnd = TokOffs+strcspn(LineEnd, "\n\r"); 283 RB.InsertTextBefore(TokEnd, "</span>", strlen("</span>")); 284 } 285 break; 286 } 287 288 L.LexRawToken(Tok); 289 } 290} 291 292/// HighlightMacros - This uses the macro table state from the end of the 293/// file, to reexpand macros and insert (into the HTML) information about the 294/// macro expansions. This won't be perfectly perfect, but it will be 295/// reasonably close. 296void html::HighlightMacros(Rewriter &R, unsigned FileID, Preprocessor &PP) { 297 RewriteBuffer &RB = R.getEditBuffer(FileID); 298 299 // Inform the preprocessor that we don't want comments. 300 PP.SetCommentRetentionState(false, false); 301 302 // Start parsing the specified input file. 303 PP.EnterMainSourceFile(); 304 305 // Lex all the tokens. 306 const SourceManager &SourceMgr = PP.getSourceManager(); 307 Token Tok; 308 PP.Lex(Tok); 309 while (Tok.isNot(tok::eof)) { 310 // Ignore non-macro tokens. 311 if (!Tok.getLocation().isMacroID()) { 312 PP.Lex(Tok); 313 continue; 314 } 315 316 // Ignore tokens whose logical location was not the main file. 317 SourceLocation LLoc = SourceMgr.getLogicalLoc(Tok.getLocation()); 318 std::pair<unsigned, unsigned> LLocInfo = 319 SourceMgr.getDecomposedFileLoc(LLoc); 320 321 if (LLocInfo.first != FileID) { 322 PP.Lex(Tok); 323 continue; 324 } 325 326 // Okay, we have the first token of a macro expansion: highlight the 327 // instantiation. 328 329 // Get the size of current macro call itself. 330 // FIXME: This should highlight the args of a function-like 331 // macro, using a heuristic. 332 unsigned TokLen = Lexer::MeasureTokenLength(LLoc, SourceMgr); 333 334 unsigned TokOffs = LLocInfo.second; 335 RB.InsertTextAfter(TokOffs, "<span class='macro'>", 336 strlen("<span class='macro'>")); 337 RB.InsertTextBefore(TokOffs+TokLen, "</span>", strlen("</span>")); 338 339 // Okay, eat this token, getting the next one. 340 PP.Lex(Tok); 341 342 // Skip all the rest of the tokens that are part of this macro 343 // instantiation. It would be really nice to pop up a window with all the 344 // spelling of the tokens or something. 345 while (!Tok.is(tok::eof) && 346 SourceMgr.getLogicalLoc(Tok.getLocation()) == LLoc) 347 PP.Lex(Tok); 348 } 349} 350 351 352