1/*
2**********************************************************************
3*   Copyright (c) 2001-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/19/2001  aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/utf16.h"
16#include "esctrn.h"
17#include "util.h"
18
19U_NAMESPACE_BEGIN
20
21static const UChar UNIPRE[] = {85,43,0}; // "U+"
22static const UChar BS_u[] = {92,117,0}; // "\\u"
23static const UChar BS_U[] = {92,85,0}; // "\\U"
24static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
25static const UChar XML10PRE[] = {38,35,0}; // "&#"
26static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
27static const UChar SEMI[] = {59,0}; // ";"
28static const UChar RBRACE[] = {125,0}; // "}"
29
30UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
31
32/**
33 * Factory methods
34 */
35static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
36    // Unicode: "U+10FFFF" hex, min=4, max=6
37    return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
38}
39static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
40    // Java: "\\uFFFF" hex, min=4, max=4
41    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
42}
43static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
44    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
45    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
46             new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
47}
48static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
49    // XML: "" hex, min=1, max=6
50    return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
51}
52static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
53    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
54    return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
55}
56static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
57    // Perl: "\\x{263A}" hex, min=1, max=6
58    return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
59}
60
61/**
62 * Registers standard variants with the system.  Called by
63 * Transliterator during initialization.
64 */
65void EscapeTransliterator::registerIDs() {
66    Token t = integerToken(0);
67
68    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
69
70    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
71
72    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
73
74    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
75
76    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
77
78    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
79
80    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
81}
82
83/**
84 * Constructs an escape transliterator with the given ID and
85 * parameters.  See the class member documentation for details.
86 */
87EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
88                         const UnicodeString& _prefix, const UnicodeString& _suffix,
89                         int32_t _radix, int32_t _minDigits,
90                         UBool _grokSupplementals,
91                         EscapeTransliterator* adoptedSupplementalHandler) :
92    Transliterator(newID, NULL)
93{
94    this->prefix = _prefix;
95    this->suffix = _suffix;
96    this->radix = _radix;
97    this->minDigits = _minDigits;
98    this->grokSupplementals = _grokSupplementals;
99    this->supplementalHandler = adoptedSupplementalHandler;
100}
101
102/**
103 * Copy constructor.
104 */
105EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
106    Transliterator(o),
107    prefix(o.prefix),
108    suffix(o.suffix),
109    radix(o.radix),
110    minDigits(o.minDigits),
111    grokSupplementals(o.grokSupplementals) {
112    supplementalHandler = (o.supplementalHandler != 0) ?
113        new EscapeTransliterator(*o.supplementalHandler) : NULL;
114}
115
116EscapeTransliterator::~EscapeTransliterator() {
117    delete supplementalHandler;
118}
119
120/**
121 * Transliterator API.
122 */
123Transliterator* EscapeTransliterator::clone() const {
124    return new EscapeTransliterator(*this);
125}
126
127/**
128 * Implements {@link Transliterator#handleTransliterate}.
129 */
130void EscapeTransliterator::handleTransliterate(Replaceable& text,
131                                               UTransPosition& pos,
132                                               UBool /*isIncremental*/) const
133{
134    /* TODO: Verify that isIncremental can be ignored */
135    int32_t start = pos.start;
136    int32_t limit = pos.limit;
137
138    UnicodeString buf(prefix);
139    int32_t prefixLen = prefix.length();
140    UBool redoPrefix = FALSE;
141
142    while (start < limit) {
143        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
144        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
145
146        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
147            buf.truncate(0);
148            buf.append(supplementalHandler->prefix);
149            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
150                                  supplementalHandler->minDigits);
151            buf.append(supplementalHandler->suffix);
152            redoPrefix = TRUE;
153        } else {
154            if (redoPrefix) {
155                buf.truncate(0);
156                buf.append(prefix);
157                redoPrefix = FALSE;
158            } else {
159                buf.truncate(prefixLen);
160            }
161            ICU_Utility::appendNumber(buf, c, radix, minDigits);
162            buf.append(suffix);
163        }
164
165        text.handleReplaceBetween(start, start + charLen, buf);
166        start += buf.length();
167        limit += buf.length() - charLen;
168    }
169
170    pos.contextLimit += limit - pos.limit;
171    pos.limit = limit;
172    pos.start = start;
173}
174
175U_NAMESPACE_END
176
177#endif /* #if !UCONFIG_NO_TRANSLITERATION */
178
179//eof
180