1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4**********************************************************************
5*   Copyright (c) 2001-2011, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   11/19/2001  aliu        Creation.
10**********************************************************************
11*/
12package com.ibm.icu.text;
13import com.ibm.icu.impl.Utility;
14
15/**
16 * A transliterator that converts Unicode characters to an escape
17 * form.  Examples of escape forms are "U+4E01" and "".
18 * Escape forms have a prefix and suffix, either of which may be
19 * empty, a radix, typically 16 or 10, a minimum digit count,
20 * typically 1, 4, or 8, and a boolean that specifies whether
21 * supplemental characters are handled as 32-bit code points or as two
22 * 16-bit code units.  Most escape forms handle 32-bit code points,
23 * but some, such as the Java form, intentionally break them into two
24 * surrogate pairs, for backward compatibility.
25 *
26 * <p>Some escape forms actually have two different patterns, one for
27 * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
28 * handle this, a second EscapeTransliterator may be defined that
29 * specifies the pattern to be produced for supplementals.  An example
30 * of a form that requires this is the C form, which uses "\\uFFFF"
31 * for BMP characters and "\\U0010FFFF" for supplementals.
32 *
33 * <p>This class is package private.  It registers several standard
34 * variants with the system which are then accessed via their IDs.
35 *
36 * @author Alan Liu
37 */
38class EscapeTransliterator extends Transliterator {
39
40    /**
41     * The prefix of the escape form; may be empty, but usually isn't.
42     * May not be null.
43     */
44    private String prefix;
45
46    /**
47     * The prefix of the escape form; often empty.  May not be null.
48     */
49    private String suffix;
50
51    /**
52     * The radix to display the number in.  Typically 16 or 10.  Must
53     * be in the range 2 to 36.
54     */
55    private int radix;
56
57    /**
58     * The minimum number of digits.  Typically 1, 4, or 8.  Values
59     * less than 1 are equivalent to 1.
60     */
61    private int minDigits;
62
63    /**
64     * If true, supplementals are handled as 32-bit code points.  If
65     * false, they are handled as two 16-bit code units.
66     */
67    private boolean grokSupplementals;
68
69    /**
70     * The form to be used for supplementals.  If this is null then
71     * the same form is used for BMP characters and supplementals.  If
72     * this is not null and if grokSupplementals is true then the
73     * prefix, suffix, radix, and minDigits of this object are used
74     * for supplementals.
75     */
76    private EscapeTransliterator supplementalHandler;
77
78    /**
79     * Registers standard variants with the system.  Called by
80     * Transliterator during initialization.
81     */
82    static void register() {
83        // Unicode: "U+10FFFF" hex, min=4, max=6
84        Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() {
85            @Override
86            public Transliterator getInstance(String ID) {
87                return new EscapeTransliterator("Any-Hex/Unicode",
88                                                "U+", "", 16, 4, true, null);
89            }
90        });
91
92        // Java: "\\uFFFF" hex, min=4, max=4
93        Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() {
94            @Override
95            public Transliterator getInstance(String ID) {
96                return new EscapeTransliterator("Any-Hex/Java",
97                                                "\\u", "", 16, 4, false, null);
98            }
99        });
100
101        // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
102        Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() {
103            @Override
104            public Transliterator getInstance(String ID) {
105                return new EscapeTransliterator("Any-Hex/C",
106                                                "\\u", "", 16, 4, true,
107                       new EscapeTransliterator("", "\\U", "", 16, 8, true, null));
108            }
109        });
110
111        // XML: "&#x10FFFF;" hex, min=1, max=6
112        Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() {
113            @Override
114            public Transliterator getInstance(String ID) {
115                return new EscapeTransliterator("Any-Hex/XML",
116                                                "&#x", ";", 16, 1, true, null);
117            }
118        });
119
120        // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
121        Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() {
122            @Override
123            public Transliterator getInstance(String ID) {
124                return new EscapeTransliterator("Any-Hex/XML10",
125                                                "&#", ";", 10, 1, true, null);
126            }
127        });
128
129        // Perl: "\\x{263A}" hex, min=1, max=6
130        Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() {
131            @Override
132            public Transliterator getInstance(String ID) {
133                return new EscapeTransliterator("Any-Hex/Perl",
134                                                "\\x{", "}", 16, 1, true, null);
135            }
136        });
137
138        // Plain: "FFFF" hex, min=4, max=6
139        Transliterator.registerFactory("Any-Hex/Plain", new Transliterator.Factory() {
140            @Override
141            public Transliterator getInstance(String ID) {
142                return new EscapeTransliterator("Any-Hex/Plain",
143                                                "", "", 16, 4, true, null);
144            }
145        });
146
147        // Generic
148        Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() {
149            @Override
150            public Transliterator getInstance(String ID) {
151                return new EscapeTransliterator("Any-Hex",
152                                                "\\u", "", 16, 4, false, null);
153            }
154        });
155    }
156
157    /**
158     * Constructs an escape transliterator with the given ID and
159     * parameters.  See the class member documentation for details.
160     */
161    EscapeTransliterator(String ID, String prefix, String suffix,
162                         int radix, int minDigits,
163                         boolean grokSupplementals,
164                         EscapeTransliterator supplementalHandler) {
165        super(ID, null);
166        this.prefix = prefix;
167        this.suffix = suffix;
168        this.radix = radix;
169        this.minDigits = minDigits;
170        this.grokSupplementals = grokSupplementals;
171        this.supplementalHandler = supplementalHandler;
172    }
173
174    /**
175     * Implements {@link Transliterator#handleTransliterate}.
176     */
177    @Override
178    protected void handleTransliterate(Replaceable text,
179                                       Position pos, boolean incremental) {
180        int start = pos.start;
181        int limit = pos.limit;
182
183        StringBuilder buf = new StringBuilder(prefix);
184        int prefixLen = prefix.length();
185        boolean redoPrefix = false;
186
187        while (start < limit) {
188            int c = grokSupplementals ? text.char32At(start) : text.charAt(start);
189            int charLen = grokSupplementals ? UTF16.getCharCount(c) : 1;
190
191            if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) {
192                buf.setLength(0);
193                buf.append(supplementalHandler.prefix);
194                Utility.appendNumber(buf, c, supplementalHandler.radix,
195                                     supplementalHandler.minDigits);
196                buf.append(supplementalHandler.suffix);
197                redoPrefix = true;
198            } else {
199                if (redoPrefix) {
200                    buf.setLength(0);
201                    buf.append(prefix);
202                    redoPrefix = false;
203                } else {
204                    buf.setLength(prefixLen);
205                }
206                Utility.appendNumber(buf, c, radix, minDigits);
207                buf.append(suffix);
208            }
209
210            text.replace(start, start + charLen, buf.toString());
211            start += buf.length();
212            limit += buf.length() - charLen;
213        }
214
215        pos.contextLimit += limit - pos.limit;
216        pos.limit = limit;
217        pos.start = start;
218    }
219
220    /* (non-Javadoc)
221     * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
222     */
223    @Override
224    public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
225        sourceSet.addAll(getFilterAsUnicodeSet(inputFilter));
226        for (EscapeTransliterator it = this; it != null ; it = it.supplementalHandler) {
227            if (inputFilter.size() != 0) {
228                targetSet.addAll(it.prefix);
229                targetSet.addAll(it.suffix);
230                StringBuilder buffer = new StringBuilder();
231                for (int i = 0; i < it.radix; ++i) {
232                    Utility.appendNumber(buffer, i, it.radix, it.minDigits);
233                }
234                targetSet.addAll(buffer.toString()); // TODO drop once String is changed to CharSequence in UnicodeSet
235            }
236        }
237    }
238}
239