1//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend emits an fficient function to translate HTML named
11// character references to UTF-8 sequences.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/SmallString.h"
16#include "llvm/Support/ConvertUTF.h"
17#include "llvm/TableGen/Error.h"
18#include "llvm/TableGen/Record.h"
19#include "llvm/TableGen/StringMatcher.h"
20#include "llvm/TableGen/TableGenBackend.h"
21#include <vector>
22
23using namespace llvm;
24
25/// \brief Convert a code point to the corresponding UTF-8 sequence represented
26/// as a C string literal.
27///
28/// \returns true on success.
29static bool translateCodePointToUTF8(unsigned CodePoint,
30                                     SmallVectorImpl<char> &CLiteral) {
31  char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32  char *TranslatedPtr = Translated;
33  if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34    return false;
35
36  StringRef UTF8(Translated, TranslatedPtr - Translated);
37
38  raw_svector_ostream OS(CLiteral);
39  OS << "\"";
40  for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41    OS << "\\x";
42    OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43  }
44  OS << "\"";
45
46  return true;
47}
48
49namespace clang {
50void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
51                                                  raw_ostream &OS) {
52  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53  std::vector<StringMatcher::StringPair> NameToUTF8;
54  SmallString<32> CLiteral;
55  for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
56       I != E; ++I) {
57    Record &Tag = **I;
58    std::string Spelling = Tag.getValueAsString("Spelling");
59    uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
60    CLiteral.clear();
61    CLiteral.append("return ");
62    if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63      SrcMgr.PrintMessage(Tag.getLoc().front(),
64                          SourceMgr::DK_Error,
65                          Twine("invalid code point"));
66      continue;
67    }
68    CLiteral.append(";");
69
70    StringMatcher::StringPair Match(Spelling, CLiteral.str());
71    NameToUTF8.push_back(Match);
72  }
73
74  emitSourceFileHeader("HTML named character reference to UTF-8 "
75                       "translation", OS);
76
77  OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78        "                                             StringRef Name) {\n";
79  StringMatcher("Name", NameToUTF8, OS).Emit();
80  OS << "  return StringRef();\n"
81     << "}\n\n";
82}
83
84} // end namespace clang
85
86