1//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend emits an fficient function to translate HTML named 11// character references to UTF-8 sequences. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/ADT/SmallString.h" 16#include "llvm/Support/ConvertUTF.h" 17#include "llvm/TableGen/Error.h" 18#include "llvm/TableGen/Record.h" 19#include "llvm/TableGen/StringMatcher.h" 20#include "llvm/TableGen/TableGenBackend.h" 21#include <vector> 22 23using namespace llvm; 24 25/// \brief Convert a code point to the corresponding UTF-8 sequence represented 26/// as a C string literal. 27/// 28/// \returns true on success. 29static bool translateCodePointToUTF8(unsigned CodePoint, 30 SmallVectorImpl<char> &CLiteral) { 31 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; 32 char *TranslatedPtr = Translated; 33 if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) 34 return false; 35 36 StringRef UTF8(Translated, TranslatedPtr - Translated); 37 38 raw_svector_ostream OS(CLiteral); 39 OS << "\""; 40 for (size_t i = 0, e = UTF8.size(); i != e; ++i) { 41 OS << "\\x"; 42 OS.write_hex(static_cast<unsigned char>(UTF8[i])); 43 } 44 OS << "\""; 45 46 return true; 47} 48 49namespace clang { 50void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, 51 raw_ostream &OS) { 52 std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); 53 std::vector<StringMatcher::StringPair> NameToUTF8; 54 SmallString<32> CLiteral; 55 for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); 56 I != E; ++I) { 57 Record &Tag = **I; 58 std::string Spelling = Tag.getValueAsString("Spelling"); 59 uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); 60 CLiteral.clear(); 61 CLiteral.append("return "); 62 if (!translateCodePointToUTF8(CodePoint, CLiteral)) { 63 SrcMgr.PrintMessage(Tag.getLoc().front(), 64 SourceMgr::DK_Error, 65 Twine("invalid code point")); 66 continue; 67 } 68 CLiteral.append(";"); 69 70 StringMatcher::StringPair Match(Spelling, CLiteral.str()); 71 NameToUTF8.push_back(Match); 72 } 73 74 emitSourceFileHeader("HTML named character reference to UTF-8 " 75 "translation", OS); 76 77 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" 78 " StringRef Name) {\n"; 79 StringMatcher("Name", NameToUTF8, OS).Emit(); 80 OS << " return StringRef();\n" 81 << "}\n\n"; 82} 83 84} // end namespace clang 85 86