1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (c) 2001-2006, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/19/2001 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "esctrn.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar UNIPRE[] = {85,43,0}; // "U+" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar BS_u[] = {92,117,0}; // "\\u" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar BS_U[] = {92,85,0}; // "\\U" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar XMLPRE[] = {38,35,120,0}; // "&#x" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar XML10PRE[] = {38,35,0}; // "&#" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar SEMI[] = {59,0}; // ";" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar RBRACE[] = {125,0}; // "}" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar EMPTY[] = {0}; // "" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Factory methods 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Unicode: "U+10FFFF" hex, min=4, max=6 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, UNIPRE, EMPTY, 16, 4, TRUE, NULL); 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Java: "\\uFFFF" hex, min=4, max=4 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, FALSE, NULL); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, TRUE, 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru new EscapeTransliterator(EMPTY, BS_U, EMPTY, 16, 8, TRUE, NULL)); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // XML: "" hex, min=1, max=6 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, XMLPRE, SEMI, 16, 1, TRUE, NULL); 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, XML10PRE, SEMI, 10, 1, TRUE, NULL); 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Perl: "\\x{263A}" hex, min=1, max=6 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(ID, PERLPRE, RBRACE, 16, 1, TRUE, NULL); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Registers standard variants with the system. Called by 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator during initialization. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid EscapeTransliterator::registerIDs() { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Token t = integerToken(0); 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs an escape transliterator with the given ID and 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * parameters. See the class member documentation for details. 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& _prefix, const UnicodeString& _suffix, 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t _radix, int32_t _minDigits, 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool _grokSupplementals, 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru EscapeTransliterator* adoptedSupplementalHandler) : 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(newID, NULL) 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->prefix = _prefix; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->suffix = _suffix; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->radix = _radix; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->minDigits = _minDigits; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->grokSupplementals = _grokSupplementals; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->supplementalHandler = adoptedSupplementalHandler; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prefix(o.prefix), 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru suffix(o.suffix), 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru radix(o.radix), 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minDigits(o.minDigits), 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru grokSupplementals(o.grokSupplementals) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru supplementalHandler = (o.supplementalHandler != 0) ? 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru new EscapeTransliterator(*o.supplementalHandler) : NULL; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEscapeTransliterator::~EscapeTransliterator() { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete supplementalHandler; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* EscapeTransliterator::clone() const { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new EscapeTransliterator(*this); 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid EscapeTransliterator::handleTransliterate(Replaceable& text, 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransPosition& pos, 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool /*isIncremental*/) const 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* TODO: Verify that isIncremental can be ignored */ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start = pos.start; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = pos.limit; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString buf(prefix); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t prefixLen = prefix.length(); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool redoPrefix = FALSE; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (start < limit) { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.truncate(0); 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(supplementalHandler->prefix); 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru supplementalHandler->minDigits); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(supplementalHandler->suffix); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru redoPrefix = TRUE; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (redoPrefix) { 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.truncate(0); 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(prefix); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru redoPrefix = FALSE; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.truncate(prefixLen); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendNumber(buf, c, radix, minDigits); 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(suffix); 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(start, start + charLen, buf); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start += buf.length(); 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit += buf.length() - charLen; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.contextLimit += limit - pos.limit; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.limit = limit; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.start = start; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 180