1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5**********************************************************************
6*   Copyright (c) 2001-2011, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8**********************************************************************
9*   Date        Name        Description
10*   11/19/2001  aliu        Creation.
11**********************************************************************
12*/
13package android.icu.text;
14import android.icu.impl.Utility;
15import android.icu.lang.UCharacter;
16
17/**
18 * A transliterator that converts Unicode escape forms to the
19 * characters they represent.  Escape forms have a prefix, a suffix, a
20 * radix, and minimum and maximum digit counts.
21 *
22 * <p>This class is package private.  It registers several standard
23 * variants with the system which are then accessed via their IDs.
24 *
25 * @author Alan Liu
26 */
27class UnescapeTransliterator extends Transliterator {
28
29    /**
30     * The encoded pattern specification.  The pattern consists of
31     * zero or more forms.  Each form consists of a prefix, suffix,
32     * radix, minimum digit count, and maximum digit count.  These
33     * values are stored as a five character header.  That is, their
34     * numeric values are cast to 16-bit characters and stored in the
35     * string.  Following these five characters, the prefix
36     * characters, then suffix characters are stored.  Each form thus
37     * takes n+5 characters, where n is the total length of the prefix
38     * and suffix.  The end is marked by a header of length one
39     * consisting of the character END.
40     */
41    private char spec[];
42
43    /**
44     * Special character marking the end of the spec[] array.
45     */
46    private static final char END = 0xFFFF;
47
48    /**
49     * Registers standard variants with the system.  Called by
50     * Transliterator during initialization.
51     */
52    static void register() {
53        // Unicode: "U+10FFFF" hex, min=4, max=6
54        Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() {
55            @Override
56            public Transliterator getInstance(String ID) {
57                return new UnescapeTransliterator("Hex-Any/Unicode", new char[] {
58                    2, 0, 16, 4, 6, 'U', '+',
59                    END
60                });
61            }
62        });
63
64        // Java: "\\uFFFF" hex, min=4, max=4
65        Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() {
66            @Override
67            public Transliterator getInstance(String ID) {
68                return new UnescapeTransliterator("Hex-Any/Java", new char[] {
69                    2, 0, 16, 4, 4, '\\', 'u',
70                    END
71                });
72            }
73        });
74
75        // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
76        Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() {
77            @Override
78            public Transliterator getInstance(String ID) {
79                return new UnescapeTransliterator("Hex-Any/C", new char[] {
80                    2, 0, 16, 4, 4, '\\', 'u',
81                    2, 0, 16, 8, 8, '\\', 'U',
82                    END
83                });
84            }
85        });
86
87        // XML: "&#x10FFFF;" hex, min=1, max=6
88        Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() {
89            @Override
90            public Transliterator getInstance(String ID) {
91                return new UnescapeTransliterator("Hex-Any/XML", new char[] {
92                    3, 1, 16, 1, 6, '&', '#', 'x', ';',
93                    END
94                });
95            }
96        });
97
98        // XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any")
99        Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() {
100            @Override
101            public Transliterator getInstance(String ID) {
102                return new UnescapeTransliterator("Hex-Any/XML10", new char[] {
103                    2, 1, 10, 1, 7, '&', '#', ';',
104                    END
105                });
106            }
107        });
108
109        // Perl: "\\x{263A}" hex, min=1, max=6
110        Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() {
111            @Override
112            public Transliterator getInstance(String ID) {
113                return new UnescapeTransliterator("Hex-Any/Perl", new char[] {
114                    3, 1, 16, 1, 6, '\\', 'x', '{', '}',
115                    END
116                });
117            }
118        });
119
120        // All: Java, C, Perl, XML, XML10, Unicode
121        Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() {
122            @Override
123            public Transliterator getInstance(String ID) {
124                return new UnescapeTransliterator("Hex-Any", new char[] {
125                    2, 0, 16, 4, 6, 'U', '+',            // Unicode
126                    2, 0, 16, 4, 4, '\\', 'u',           // Java
127                    2, 0, 16, 8, 8, '\\', 'U',           // C (surrogates)
128                    3, 1, 16, 1, 6, '&', '#', 'x', ';',  // XML
129                    2, 1, 10, 1, 7, '&', '#', ';',       // XML10
130                    3, 1, 16, 1, 6, '\\', 'x', '{', '}', // Perl
131                    END
132                });
133            }
134        });
135    }
136
137    /**
138     * Package private constructor.  Takes the encoded spec array.
139     */
140    UnescapeTransliterator(String ID, char spec[]) {
141        super(ID, null);
142        this.spec = spec;
143    }
144
145    /**
146     * Implements {@link Transliterator#handleTransliterate}.
147     */
148    @Override
149    protected void handleTransliterate(Replaceable text,
150                                       Position pos, boolean isIncremental) {
151        int start = pos.start;
152        int limit = pos.limit;
153        int i, ipat;
154
155      loop:
156        while (start < limit) {
157            // Loop over the forms in spec[].  Exit this loop when we
158            // match one of the specs.  Exit the outer loop if a
159            // partial match is detected and isIncremental is true.
160            for (ipat = 0; spec[ipat] != END;) {
161
162                // Read the header
163                int prefixLen = spec[ipat++];
164                int suffixLen = spec[ipat++];
165                int radix     = spec[ipat++];
166                int minDigits = spec[ipat++];
167                int maxDigits = spec[ipat++];
168
169                // s is a copy of start that is advanced over the
170                // characters as we parse them.
171                int s = start;
172                boolean match = true;
173
174                for (i=0; i<prefixLen; ++i) {
175                    if (s >= limit) {
176                        if (i > 0) {
177                            // We've already matched a character.  This is
178                            // a partial match, so we return if in
179                            // incremental mode.  In non-incremental mode,
180                            // go to the next spec.
181                            if (isIncremental) {
182                                break loop;
183                            }
184                            match = false;
185                            break;
186                        }
187                    }
188                    char c = text.charAt(s++);
189                    if (c != spec[ipat + i]) {
190                        match = false;
191                        break;
192                    }
193                }
194
195                if (match) {
196                    int u = 0;
197                    int digitCount = 0;
198                    for (;;) {
199                        if (s >= limit) {
200                            // Check for partial match in incremental mode.
201                            if (s > start && isIncremental) {
202                                break loop;
203                            }
204                            break;
205                        }
206                        int ch = text.char32At(s);
207                        int digit = UCharacter.digit(ch, radix);
208                        if (digit < 0) {
209                            break;
210                        }
211                        s += UTF16.getCharCount(ch);
212                        u = (u * radix) + digit;
213                        if (++digitCount == maxDigits) {
214                            break;
215                        }
216                    }
217
218                    match = (digitCount >= minDigits);
219
220                    if (match) {
221                        for (i=0; i<suffixLen; ++i) {
222                            if (s >= limit) {
223                                // Check for partial match in incremental mode.
224                                if (s > start && isIncremental) {
225                                    break loop;
226                                }
227                                match = false;
228                                break;
229                            }
230                            char c = text.charAt(s++);
231                            if (c != spec[ipat + prefixLen + i]) {
232                                match = false;
233                                break;
234                            }
235                        }
236
237                        if (match) {
238                            // At this point, we have a match
239                            String str = UTF16.valueOf(u);
240                            text.replace(start, s, str);
241                            limit -= s - start - str.length();
242                            // The following break statement leaves the
243                            // loop that is traversing the forms in
244                            // spec[].  We then parse the next input
245                            // character.
246                            break;
247                        }
248                    }
249                }
250
251                ipat += prefixLen + suffixLen;
252            }
253
254            if (start < limit) {
255                start += UTF16.getCharCount(text.char32At(start));
256            }
257        }
258
259        pos.contextLimit += limit - pos.limit;
260        pos.limit = limit;
261        pos.start = start;
262    }
263
264    /* (non-Javadoc)
265     * @see android.icu.text.Transliterator#addSourceTargetSet(android.icu.text.UnicodeSet, android.icu.text.UnicodeSet, android.icu.text.UnicodeSet)
266     */
267    @Override
268    public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
269        // Each form consists of a prefix, suffix,
270        // * radix, minimum digit count, and maximum digit count.  These
271        // * values are stored as a five character header. ...
272        UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
273        UnicodeSet items = new UnicodeSet();
274        StringBuilder buffer = new StringBuilder();
275        for (int i = 0; spec[i] != END;) {
276            // first 5 items are header
277            int end = i + spec[i] + spec[i+1] + 5;
278            int radix = spec[i+2];
279            for (int j = 0; j < radix; ++j) {
280                Utility.appendNumber(buffer, j, radix, 0);
281            }
282            // then add the characters
283            for (int j = i + 5; j < end; ++j) {
284                items.add(spec[j]);
285            }
286            // and go to next block
287            i = end;
288        }
289        items.addAll(buffer.toString());
290        items.retainAll(myFilter);
291
292        if (items.size() > 0) {
293            sourceSet.addAll(items);
294            targetSet.addAll(0,0x10FFFF); // assume we can produce any character
295        }
296    }
297}
298