1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (c) 2001-2006, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   11/19/2001  aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "esctrn.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar UNIPRE[] = {85,43,0}; // "U+"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar BS_u[] = {92,117,0}; // "\\u"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar BS_U[] = {92,85,0}; // "\\U"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar XML10PRE[] = {38,35,0}; // "&#"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar SEMI[] = {59,0}; // ";"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar RBRACE[] = {125,0}; // "}"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar EMPTY[] = {0}; // ""
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Factory methods
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Unicode: "U+10FFFF" hex, min=4, max=6
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, UNIPRE, EMPTY, 16, 4, TRUE, NULL);
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Java: "\\uFFFF" hex, min=4, max=4
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, FALSE, NULL);
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, TRUE,
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             new EscapeTransliterator(EMPTY, BS_U, EMPTY, 16, 8, TRUE, NULL));
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // XML: "" hex, min=1, max=6
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, XMLPRE, SEMI, 16, 1, TRUE, NULL);
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, XML10PRE, SEMI, 10, 1, TRUE, NULL);
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Perl: "\\x{263A}" hex, min=1, max=6
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(ID, PERLPRE, RBRACE, 16, 1, TRUE, NULL);
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Registers standard variants with the system.  Called by
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator during initialization.
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid EscapeTransliterator::registerIDs() {
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Token t = integerToken(0);
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs an escape transliterator with the given ID and
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parameters.  See the class member documentation for details.
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         const UnicodeString& _prefix, const UnicodeString& _suffix,
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         int32_t _radix, int32_t _minDigits,
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UBool _grokSupplementals,
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         EscapeTransliterator* adoptedSupplementalHandler) :
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(newID, NULL)
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->prefix = _prefix;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->suffix = _suffix;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->radix = _radix;
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->minDigits = _minDigits;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->grokSupplementals = _grokSupplementals;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->supplementalHandler = adoptedSupplementalHandler;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor.
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(o),
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    prefix(o.prefix),
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    suffix(o.suffix),
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    radix(o.radix),
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    minDigits(o.minDigits),
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    grokSupplementals(o.grokSupplementals) {
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    supplementalHandler = (o.supplementalHandler != 0) ?
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        new EscapeTransliterator(*o.supplementalHandler) : NULL;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::~EscapeTransliterator() {
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete supplementalHandler;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API.
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* EscapeTransliterator::clone() const {
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new EscapeTransliterator(*this);
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid EscapeTransliterator::handleTransliterate(Replaceable& text,
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                               UTransPosition& pos,
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                               UBool /*isIncremental*/) const
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* TODO: Verify that isIncremental can be ignored */
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = pos.start;
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t limit = pos.limit;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString buf(prefix);
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t prefixLen = prefix.length();
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool redoPrefix = FALSE;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (start < limit) {
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.truncate(0);
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.append(supplementalHandler->prefix);
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                  supplementalHandler->minDigits);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.append(supplementalHandler->suffix);
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            redoPrefix = TRUE;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (redoPrefix) {
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf.truncate(0);
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf.append(prefix);
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                redoPrefix = FALSE;
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf.truncate(prefixLen);
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendNumber(buf, c, radix, minDigits);
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.append(suffix);
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        text.handleReplaceBetween(start, start + charLen, buf);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        start += buf.length();
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        limit += buf.length() - charLen;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos.contextLimit += limit - pos.limit;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos.limit = limit;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos.start = start;
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
180