1/*
2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.awt.font;
27
28import java.io.IOException;
29import java.io.ObjectOutputStream;
30import java.util.Arrays;
31import java.util.Comparator;
32import java.util.EnumSet;
33import java.util.Set;
34
35/**
36 * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
37 * digits to other Unicode decimal digits.  Users of this class will
38 * primarily be people who wish to present data using
39 * national digit shapes, but find it more convenient to represent the
40 * data internally using Latin-1 (European) digits.  This does not
41 * interpret the deprecated numeric shape selector character (U+206E).
42 * <p>
43 * Instances of <code>NumericShaper</code> are typically applied
44 * as attributes to text with the
45 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
46 * of the <code>TextAttribute</code> class.
47 * For example, this code snippet causes a <code>TextLayout</code> to
48 * shape European digits to Arabic in an Arabic context:<br>
49 * <blockquote><pre>
50 * Map map = new HashMap();
51 * map.put(TextAttribute.NUMERIC_SHAPING,
52 *     NumericShaper.getContextualShaper(NumericShaper.ARABIC));
53 * FontRenderContext frc = ...;
54 * TextLayout layout = new TextLayout(text, map, frc);
55 * layout.draw(g2d, x, y);
56 * </pre></blockquote>
57 * <br>
58 * It is also possible to perform numeric shaping explicitly using instances
59 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
60 * <blockquote><pre>
61 * char[] text = ...;
62 * // shape all EUROPEAN digits (except zero) to ARABIC digits
63 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
64 * shaper.shape(text, start, count);
65 *
66 * // shape European digits to ARABIC digits if preceding text is Arabic, or
67 * // shape European digits to TAMIL digits if preceding text is Tamil, or
68 * // leave European digits alone if there is no preceding text, or
69 * // preceding text is neither Arabic nor Tamil
70 * NumericShaper shaper =
71 *     NumericShaper.getContextualShaper(NumericShaper.ARABIC |
72 *                                         NumericShaper.TAMIL,
73 *                                       NumericShaper.EUROPEAN);
74 * shaper.shape(text, start, count);
75 * </pre></blockquote>
76 *
77 * <p><b>Bit mask- and enum-based Unicode ranges</b></p>
78 *
79 * <p>This class supports two different programming interfaces to
80 * represent Unicode ranges for script-specific digits: bit
81 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
82 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
83 * Multiple ranges can be specified by ORing bit mask-based constants,
84 * such as:
85 * <blockquote><pre>
86 * NumericShaper.ARABIC | NumericShaper.TAMIL
87 * </pre></blockquote>
88 * or creating a {@code Set} with the {@link NumericShaper.Range}
89 * constants, such as:
90 * <blockquote><pre>
91 * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
92 * </pre></blockquote>
93 * The enum-based ranges are a super set of the bit mask-based ones.
94 *
95 * <p>If the two interfaces are mixed (including serialization),
96 * Unicode range values are mapped to their counterparts where such
97 * mapping is possible, such as {@code NumericShaper.Range.ARABIC}
98 * from/to {@code NumericShaper.ARABIC}.  If any unmappable range
99 * values are specified, such as {@code NumericShaper.Range.BALINESE},
100 * those ranges are ignored.
101 *
102 * <p><b>Decimal Digits Precedence</b></p>
103 *
104 * <p>A Unicode range may have more than one set of decimal digits. If
105 * multiple decimal digits sets are specified for the same Unicode
106 * range, one of the sets will take precedence as follows.
107 *
108 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence.">
109 *    <tr>
110 *       <th class="TableHeadingColor">Unicode Range</th>
111 *       <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th>
112 *       <th class="TableHeadingColor">Precedence</th>
113 *    </tr>
114 *    <tr>
115 *       <td rowspan="2">Arabic</td>
116 *       <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br>
117 *           {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
118 *       <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
119 *    </tr>
120 *    <tr>
121 *       <td>{@link NumericShaper.Range#ARABIC}<br>
122 *           {@link NumericShaper.Range#EASTERN_ARABIC}</td>
123 *       <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td>
124 *    </tr>
125 *    <tr>
126 *       <td>Tai Tham</td>
127 *       <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br>
128 *           {@link NumericShaper.Range#TAI_THAM_THAM}</td>
129 *       <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td>
130 *    </tr>
131 * </table>
132 *
133 * @since 1.4
134 */
135
136public final class NumericShaper implements java.io.Serializable {
137    /**
138     * A {@code NumericShaper.Range} represents a Unicode range of a
139     * script having its own decimal digits. For example, the {@link
140     * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
141     * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
142     *
143     * <p>The <code>Range</code> enum replaces the traditional bit
144     * mask-based values (e.g., {@link NumericShaper#ARABIC}), and
145     * supports more Unicode ranges than the bit mask-based ones. For
146     * example, the following code using the bit mask:
147     * <blockquote><pre>
148     * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
149     *                                     NumericShaper.TAMIL,
150     *                                   NumericShaper.EUROPEAN);
151     * </pre></blockquote>
152     * can be written using this enum as:
153     * <blockquote><pre>
154     * NumericShaper.getContextualShaper(EnumSet.of(
155     *                                     NumericShaper.Range.ARABIC,
156     *                                     NumericShaper.Range.TAMIL),
157     *                                   NumericShaper.Range.EUROPEAN);
158     * </pre></blockquote>
159     *
160     * @since 1.7
161     */
162    public static enum Range {
163        // The order of EUROPEAN to MOGOLIAN must be consistent
164        // with the bitmask-based constants.
165        /**
166         * The Latin (European) range with the Latin (ASCII) digits.
167         */
168        EUROPEAN        ('\u0030', '\u0000', '\u0300'),
169        /**
170         * The Arabic range with the Arabic-Indic digits.
171         */
172        ARABIC          ('\u0660', '\u0600', '\u0780'),
173        /**
174         * The Arabic range with the Eastern Arabic-Indic digits.
175         */
176        EASTERN_ARABIC  ('\u06f0', '\u0600', '\u0780'),
177        /**
178         * The Devanagari range with the Devanagari digits.
179         */
180        DEVANAGARI      ('\u0966', '\u0900', '\u0980'),
181        /**
182         * The Bengali range with the Bengali digits.
183         */
184        BENGALI         ('\u09e6', '\u0980', '\u0a00'),
185        /**
186         * The Gurmukhi range with the Gurmukhi digits.
187         */
188        GURMUKHI        ('\u0a66', '\u0a00', '\u0a80'),
189        /**
190         * The Gujarati range with the Gujarati digits.
191         */
192        GUJARATI        ('\u0ae6', '\u0b00', '\u0b80'),
193        /**
194         * The Oriya range with the Oriya digits.
195         */
196        ORIYA           ('\u0b66', '\u0b00', '\u0b80'),
197        /**
198         * The Tamil range with the Tamil digits.
199         */
200        TAMIL           ('\u0be6', '\u0b80', '\u0c00'),
201        /**
202         * The Telugu range with the Telugu digits.
203         */
204        TELUGU          ('\u0c66', '\u0c00', '\u0c80'),
205        /**
206         * The Kannada range with the Kannada digits.
207         */
208        KANNADA         ('\u0ce6', '\u0c80', '\u0d00'),
209        /**
210         * The Malayalam range with the Malayalam digits.
211         */
212        MALAYALAM       ('\u0d66', '\u0d00', '\u0d80'),
213        /**
214         * The Thai range with the Thai digits.
215         */
216        THAI            ('\u0e50', '\u0e00', '\u0e80'),
217        /**
218         * The Lao range with the Lao digits.
219         */
220        LAO             ('\u0ed0', '\u0e80', '\u0f00'),
221        /**
222         * The Tibetan range with the Tibetan digits.
223         */
224        TIBETAN         ('\u0f20', '\u0f00', '\u1000'),
225        /**
226         * The Myanmar range with the Myanmar digits.
227         */
228        MYANMAR         ('\u1040', '\u1000', '\u1080'),
229        /**
230         * The Ethiopic range with the Ethiopic digits. Ethiopic
231         * does not have a decimal digit 0 so Latin (European) 0 is
232         * used.
233         */
234        ETHIOPIC        ('\u1369', '\u1200', '\u1380') {
235            @Override
236            char getNumericBase() { return 1; }
237        },
238        /**
239         * The Khmer range with the Khmer digits.
240         */
241        KHMER           ('\u17e0', '\u1780', '\u1800'),
242        /**
243         * The Mongolian range with the Mongolian digits.
244         */
245        MONGOLIAN       ('\u1810', '\u1800', '\u1900'),
246        // The order of EUROPEAN to MOGOLIAN must be consistent
247        // with the bitmask-based constants.
248
249        /**
250         * The N'Ko range with the N'Ko digits.
251         */
252        NKO             ('\u07c0', '\u07c0', '\u0800'),
253        /**
254         * The Myanmar range with the Myanmar Shan digits.
255         */
256        MYANMAR_SHAN    ('\u1090', '\u1000', '\u10a0'),
257        /**
258         * The Limbu range with the Limbu digits.
259         */
260        LIMBU           ('\u1946', '\u1900', '\u1950'),
261        /**
262         * The New Tai Lue range with the New Tai Lue digits.
263         */
264        NEW_TAI_LUE     ('\u19d0', '\u1980', '\u19e0'),
265        /**
266         * The Balinese range with the Balinese digits.
267         */
268        BALINESE        ('\u1b50', '\u1b00', '\u1b80'),
269        /**
270         * The Sundanese range with the Sundanese digits.
271         */
272        SUNDANESE       ('\u1bb0', '\u1b80', '\u1bc0'),
273        /**
274         * The Lepcha range with the Lepcha digits.
275         */
276        LEPCHA          ('\u1c40', '\u1c00', '\u1c50'),
277        /**
278         * The Ol Chiki range with the Ol Chiki digits.
279         */
280        OL_CHIKI        ('\u1c50', '\u1c50', '\u1c80'),
281        /**
282         * The Vai range with the Vai digits.
283         */
284        VAI             ('\ua620', '\ua500', '\ua640'),
285        /**
286         * The Saurashtra range with the Saurashtra digits.
287         */
288        SAURASHTRA      ('\ua8d0', '\ua880', '\ua8e0'),
289        /**
290         * The Kayah Li range with the Kayah Li digits.
291         */
292        KAYAH_LI        ('\ua900', '\ua900', '\ua930'),
293        /**
294         * The Cham range with the Cham digits.
295         */
296        CHAM            ('\uaa50', '\uaa00', '\uaa60'),
297        /**
298         * The Tai Tham Hora range with the Tai Tham Hora digits.
299         */
300        TAI_THAM_HORA   ('\u1a80', '\u1a20', '\u1ab0'),
301        /**
302         * The Tai Tham Tham range with the Tai Tham Tham digits.
303         */
304        TAI_THAM_THAM   ('\u1a90', '\u1a20', '\u1ab0'),
305        /**
306         * The Javanese range with the Javanese digits.
307         */
308        JAVANESE        ('\ua9d0', '\ua980', '\ua9e0'),
309        /**
310         * The Meetei Mayek range with the Meetei Mayek digits.
311         */
312        MEETEI_MAYEK    ('\uabf0', '\uabc0', '\uac00');
313
314        private static int toRangeIndex(Range script) {
315            int index = script.ordinal();
316            return index < NUM_KEYS ? index : -1;
317        }
318
319        private static Range indexToRange(int index) {
320            return index < NUM_KEYS ? Range.values()[index] : null;
321        }
322
323        private static int toRangeMask(Set<Range> ranges) {
324            int m = 0;
325            for (Range range : ranges) {
326                int index = range.ordinal();
327                if (index < NUM_KEYS) {
328                    m |= 1 << index;
329                }
330            }
331            return m;
332        }
333
334        private static Set<Range> maskToRangeSet(int mask) {
335            Set<Range> set = EnumSet.noneOf(Range.class);
336            Range[] a = Range.values();
337            for (int i = 0; i < NUM_KEYS; i++) {
338                if ((mask & (1 << i)) != 0) {
339                    set.add(a[i]);
340                }
341            }
342            return set;
343        }
344
345        // base character of range digits
346        private final int base;
347        // Unicode range
348        private final int start, // inclusive
349                          end;   // exclusive
350
351        private Range(int base, int start, int end) {
352            this.base = base - ('0' + getNumericBase());
353            this.start = start;
354            this.end = end;
355        }
356
357        private int getDigitBase() {
358            return base;
359        }
360
361        char getNumericBase() {
362            return 0;
363        }
364
365        private boolean inRange(int c) {
366            return start <= c && c < end;
367        }
368    }
369
370    /** index of context for contextual shaping - values range from 0 to 18 */
371    private int key;
372
373    /** flag indicating whether to shape contextually (high bit) and which
374     *  digit ranges to shape (bits 0-18)
375     */
376    private int mask;
377
378    /**
379     * The context {@code Range} for contextual shaping or the {@code
380     * Range} for non-contextual shaping. {@code null} for the bit
381     * mask-based API.
382     *
383     * @since 1.7
384     */
385    private Range shapingRange;
386
387    /**
388     * {@code Set<Range>} indicating which Unicode ranges to
389     * shape. {@code null} for the bit mask-based API.
390     */
391    private transient Set<Range> rangeSet;
392
393    /**
394     * rangeSet.toArray() value. Sorted by Range.base when the number
395     * of elements is greater then BSEARCH_THRESHOLD.
396     */
397    private transient Range[] rangeArray;
398
399    /**
400     * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
401     */
402    private static final int BSEARCH_THRESHOLD = 3;
403
404    private static final long serialVersionUID = -8022764705923730308L;
405
406    /** Identifies the Latin-1 (European) and extended range, and
407     *  Latin-1 (European) decimal base.
408     */
409    public static final int EUROPEAN = 1<<0;
410
411    /** Identifies the ARABIC range and decimal base. */
412    public static final int ARABIC = 1<<1;
413
414    /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
415    public static final int EASTERN_ARABIC = 1<<2;
416
417    /** Identifies the DEVANAGARI range and decimal base. */
418    public static final int DEVANAGARI = 1<<3;
419
420    /** Identifies the BENGALI range and decimal base. */
421    public static final int BENGALI = 1<<4;
422
423    /** Identifies the GURMUKHI range and decimal base. */
424    public static final int GURMUKHI = 1<<5;
425
426    /** Identifies the GUJARATI range and decimal base. */
427    public static final int GUJARATI = 1<<6;
428
429    /** Identifies the ORIYA range and decimal base. */
430    public static final int ORIYA = 1<<7;
431
432    /** Identifies the TAMIL range and decimal base. */
433    // TAMIL DIGIT ZERO was added in Unicode 4.1
434    public static final int TAMIL = 1<<8;
435
436    /** Identifies the TELUGU range and decimal base. */
437    public static final int TELUGU = 1<<9;
438
439    /** Identifies the KANNADA range and decimal base. */
440    public static final int KANNADA = 1<<10;
441
442    /** Identifies the MALAYALAM range and decimal base. */
443    public static final int MALAYALAM = 1<<11;
444
445    /** Identifies the THAI range and decimal base. */
446    public static final int THAI = 1<<12;
447
448    /** Identifies the LAO range and decimal base. */
449    public static final int LAO = 1<<13;
450
451    /** Identifies the TIBETAN range and decimal base. */
452    public static final int TIBETAN = 1<<14;
453
454    /** Identifies the MYANMAR range and decimal base. */
455    public static final int MYANMAR = 1<<15;
456
457    /** Identifies the ETHIOPIC range and decimal base. */
458    public static final int ETHIOPIC = 1<<16;
459
460    /** Identifies the KHMER range and decimal base. */
461    public static final int KHMER = 1<<17;
462
463    /** Identifies the MONGOLIAN range and decimal base. */
464    public static final int MONGOLIAN = 1<<18;
465
466    /** Identifies all ranges, for full contextual shaping.
467     *
468     * <p>This constant specifies all of the bit mask-based
469     * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
470     * specify all of the enum-based ranges.
471     */
472    public static final int ALL_RANGES = 0x0007ffff;
473
474    private static final int EUROPEAN_KEY = 0;
475    private static final int ARABIC_KEY = 1;
476    private static final int EASTERN_ARABIC_KEY = 2;
477    private static final int DEVANAGARI_KEY = 3;
478    private static final int BENGALI_KEY = 4;
479    private static final int GURMUKHI_KEY = 5;
480    private static final int GUJARATI_KEY = 6;
481    private static final int ORIYA_KEY = 7;
482    private static final int TAMIL_KEY = 8;
483    private static final int TELUGU_KEY = 9;
484    private static final int KANNADA_KEY = 10;
485    private static final int MALAYALAM_KEY = 11;
486    private static final int THAI_KEY = 12;
487    private static final int LAO_KEY = 13;
488    private static final int TIBETAN_KEY = 14;
489    private static final int MYANMAR_KEY = 15;
490    private static final int ETHIOPIC_KEY = 16;
491    private static final int KHMER_KEY = 17;
492    private static final int MONGOLIAN_KEY = 18;
493
494    private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
495
496    private static final int CONTEXTUAL_MASK = 1<<31;
497
498    private static final char[] bases = {
499        '\u0030' - '\u0030', // EUROPEAN
500        '\u0660' - '\u0030', // ARABIC-INDIC
501        '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
502        '\u0966' - '\u0030', // DEVANAGARI
503        '\u09e6' - '\u0030', // BENGALI
504        '\u0a66' - '\u0030', // GURMUKHI
505        '\u0ae6' - '\u0030', // GUJARATI
506        '\u0b66' - '\u0030', // ORIYA
507        '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
508        '\u0c66' - '\u0030', // TELUGU
509        '\u0ce6' - '\u0030', // KANNADA
510        '\u0d66' - '\u0030', // MALAYALAM
511        '\u0e50' - '\u0030', // THAI
512        '\u0ed0' - '\u0030', // LAO
513        '\u0f20' - '\u0030', // TIBETAN
514        '\u1040' - '\u0030', // MYANMAR
515        '\u1369' - '\u0031', // ETHIOPIC - no zero
516        '\u17e0' - '\u0030', // KHMER
517        '\u1810' - '\u0030', // MONGOLIAN
518    };
519
520    // some ranges adjoin or overlap, rethink if we want to do a binary search on this
521
522    private static final char[] contexts = {
523        '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
524        '\u0600', '\u0780', // ARABIC
525        '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
526        '\u0900', '\u0980', // DEVANAGARI
527        '\u0980', '\u0a00', // BENGALI
528        '\u0a00', '\u0a80', // GURMUKHI
529        '\u0a80', '\u0b00', // GUJARATI
530        '\u0b00', '\u0b80', // ORIYA
531        '\u0b80', '\u0c00', // TAMIL
532        '\u0c00', '\u0c80', // TELUGU
533        '\u0c80', '\u0d00', // KANNADA
534        '\u0d00', '\u0d80', // MALAYALAM
535        '\u0e00', '\u0e80', // THAI
536        '\u0e80', '\u0f00', // LAO
537        '\u0f00', '\u1000', // TIBETAN
538        '\u1000', '\u1080', // MYANMAR
539        '\u1200', '\u1380', // ETHIOPIC - note missing zero
540        '\u1780', '\u1800', // KHMER
541        '\u1800', '\u1900', // MONGOLIAN
542        '\uffff',
543    };
544
545    // assume most characters are near each other so probing the cache is infrequent,
546    // and a linear probe is ok.
547
548    private static int ctCache = 0;
549    private static int ctCacheLimit = contexts.length - 2;
550
551    // warning, synchronize access to this as it modifies state
552    private static int getContextKey(char c) {
553        if (c < contexts[ctCache]) {
554            while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
555        } else if (c >= contexts[ctCache + 1]) {
556            while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
557        }
558
559        // if we're not in a known range, then return EUROPEAN as the range key
560        return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
561    }
562
563    // cache for the NumericShaper.Range version
564    private transient volatile Range currentRange = Range.EUROPEAN;
565
566    private Range rangeForCodePoint(final int codepoint) {
567        if (currentRange.inRange(codepoint)) {
568            return currentRange;
569        }
570
571        final Range[] ranges = rangeArray;
572        if (ranges.length > BSEARCH_THRESHOLD) {
573            int lo = 0;
574            int hi = ranges.length - 1;
575            while (lo <= hi) {
576                int mid = (lo + hi) / 2;
577                Range range = ranges[mid];
578                if (codepoint < range.start) {
579                    hi = mid - 1;
580                } else if (codepoint >= range.end) {
581                    lo = mid + 1;
582                } else {
583                    currentRange = range;
584                    return range;
585                }
586            }
587        } else {
588            for (int i = 0; i < ranges.length; i++) {
589                if (ranges[i].inRange(codepoint)) {
590                    return ranges[i];
591                }
592            }
593        }
594        return Range.EUROPEAN;
595    }
596
597    /*
598     * A range table of strong directional characters (types L, R, AL).
599     * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
600     * characters, odd (right) indexes are starts of ranges of strong directional
601     * characters.
602     */
603    private static int[] strongTable = {
604        0x0000, 0x0041,
605        0x005b, 0x0061,
606        0x007b, 0x00aa,
607        0x00ab, 0x00b5,
608        0x00b6, 0x00ba,
609        0x00bb, 0x00c0,
610        0x00d7, 0x00d8,
611        0x00f7, 0x00f8,
612        0x02b9, 0x02bb,
613        0x02c2, 0x02d0,
614        0x02d2, 0x02e0,
615        0x02e5, 0x02ee,
616        0x02ef, 0x0370,
617        0x0374, 0x0376,
618        0x037e, 0x0386,
619        0x0387, 0x0388,
620        0x03f6, 0x03f7,
621        0x0483, 0x048a,
622        0x058a, 0x05be,
623        0x05bf, 0x05c0,
624        0x05c1, 0x05c3,
625        0x05c4, 0x05c6,
626        0x05c7, 0x05d0,
627        0x0600, 0x0608,
628        0x0609, 0x060b,
629        0x060c, 0x060d,
630        0x060e, 0x061b,
631        0x064b, 0x066d,
632        0x0670, 0x0671,
633        0x06d6, 0x06e5,
634        0x06e7, 0x06ee,
635        0x06f0, 0x06fa,
636        0x0711, 0x0712,
637        0x0730, 0x074d,
638        0x07a6, 0x07b1,
639        0x07eb, 0x07f4,
640        0x07f6, 0x07fa,
641        0x0816, 0x081a,
642        0x081b, 0x0824,
643        0x0825, 0x0828,
644        0x0829, 0x0830,
645        0x0859, 0x085e,
646        0x08e4, 0x0903,
647        0x093a, 0x093b,
648        0x093c, 0x093d,
649        0x0941, 0x0949,
650        0x094d, 0x094e,
651        0x0951, 0x0958,
652        0x0962, 0x0964,
653        0x0981, 0x0982,
654        0x09bc, 0x09bd,
655        0x09c1, 0x09c7,
656        0x09cd, 0x09ce,
657        0x09e2, 0x09e6,
658        0x09f2, 0x09f4,
659        0x09fb, 0x0a03,
660        0x0a3c, 0x0a3e,
661        0x0a41, 0x0a59,
662        0x0a70, 0x0a72,
663        0x0a75, 0x0a83,
664        0x0abc, 0x0abd,
665        0x0ac1, 0x0ac9,
666        0x0acd, 0x0ad0,
667        0x0ae2, 0x0ae6,
668        0x0af1, 0x0b02,
669        0x0b3c, 0x0b3d,
670        0x0b3f, 0x0b40,
671        0x0b41, 0x0b47,
672        0x0b4d, 0x0b57,
673        0x0b62, 0x0b66,
674        0x0b82, 0x0b83,
675        0x0bc0, 0x0bc1,
676        0x0bcd, 0x0bd0,
677        0x0bf3, 0x0c01,
678        0x0c3e, 0x0c41,
679        0x0c46, 0x0c58,
680        0x0c62, 0x0c66,
681        0x0c78, 0x0c7f,
682        0x0cbc, 0x0cbd,
683        0x0ccc, 0x0cd5,
684        0x0ce2, 0x0ce6,
685        0x0d41, 0x0d46,
686        0x0d4d, 0x0d4e,
687        0x0d62, 0x0d66,
688        0x0dca, 0x0dcf,
689        0x0dd2, 0x0dd8,
690        0x0e31, 0x0e32,
691        0x0e34, 0x0e40,
692        0x0e47, 0x0e4f,
693        0x0eb1, 0x0eb2,
694        0x0eb4, 0x0ebd,
695        0x0ec8, 0x0ed0,
696        0x0f18, 0x0f1a,
697        0x0f35, 0x0f36,
698        0x0f37, 0x0f38,
699        0x0f39, 0x0f3e,
700        0x0f71, 0x0f7f,
701        0x0f80, 0x0f85,
702        0x0f86, 0x0f88,
703        0x0f8d, 0x0fbe,
704        0x0fc6, 0x0fc7,
705        0x102d, 0x1031,
706        0x1032, 0x1038,
707        0x1039, 0x103b,
708        0x103d, 0x103f,
709        0x1058, 0x105a,
710        0x105e, 0x1061,
711        0x1071, 0x1075,
712        0x1082, 0x1083,
713        0x1085, 0x1087,
714        0x108d, 0x108e,
715        0x109d, 0x109e,
716        0x135d, 0x1360,
717        0x1390, 0x13a0,
718        0x1400, 0x1401,
719        0x1680, 0x1681,
720        0x169b, 0x16a0,
721        0x1712, 0x1720,
722        0x1732, 0x1735,
723        0x1752, 0x1760,
724        0x1772, 0x1780,
725        0x17b4, 0x17b6,
726        0x17b7, 0x17be,
727        0x17c6, 0x17c7,
728        0x17c9, 0x17d4,
729        0x17db, 0x17dc,
730        0x17dd, 0x17e0,
731        0x17f0, 0x1810,
732        0x18a9, 0x18aa,
733        0x1920, 0x1923,
734        0x1927, 0x1929,
735        0x1932, 0x1933,
736        0x1939, 0x1946,
737        0x19de, 0x1a00,
738        0x1a17, 0x1a19,
739        0x1a56, 0x1a57,
740        0x1a58, 0x1a61,
741        0x1a62, 0x1a63,
742        0x1a65, 0x1a6d,
743        0x1a73, 0x1a80,
744        0x1b00, 0x1b04,
745        0x1b34, 0x1b35,
746        0x1b36, 0x1b3b,
747        0x1b3c, 0x1b3d,
748        0x1b42, 0x1b43,
749        0x1b6b, 0x1b74,
750        0x1b80, 0x1b82,
751        0x1ba2, 0x1ba6,
752        0x1ba8, 0x1baa,
753        0x1bab, 0x1bac,
754        0x1be6, 0x1be7,
755        0x1be8, 0x1bea,
756        0x1bed, 0x1bee,
757        0x1bef, 0x1bf2,
758        0x1c2c, 0x1c34,
759        0x1c36, 0x1c3b,
760        0x1cd0, 0x1cd3,
761        0x1cd4, 0x1ce1,
762        0x1ce2, 0x1ce9,
763        0x1ced, 0x1cee,
764        0x1cf4, 0x1cf5,
765        0x1dc0, 0x1e00,
766        0x1fbd, 0x1fbe,
767        0x1fbf, 0x1fc2,
768        0x1fcd, 0x1fd0,
769        0x1fdd, 0x1fe0,
770        0x1fed, 0x1ff2,
771        0x1ffd, 0x200e,
772        0x2010, 0x2071,
773        0x2074, 0x207f,
774        0x2080, 0x2090,
775        0x20a0, 0x2102,
776        0x2103, 0x2107,
777        0x2108, 0x210a,
778        0x2114, 0x2115,
779        0x2116, 0x2119,
780        0x211e, 0x2124,
781        0x2125, 0x2126,
782        0x2127, 0x2128,
783        0x2129, 0x212a,
784        0x212e, 0x212f,
785        0x213a, 0x213c,
786        0x2140, 0x2145,
787        0x214a, 0x214e,
788        0x2150, 0x2160,
789        0x2189, 0x2336,
790        0x237b, 0x2395,
791        0x2396, 0x249c,
792        0x24ea, 0x26ac,
793        0x26ad, 0x2800,
794        0x2900, 0x2c00,
795        0x2ce5, 0x2ceb,
796        0x2cef, 0x2cf2,
797        0x2cf9, 0x2d00,
798        0x2d7f, 0x2d80,
799        0x2de0, 0x3005,
800        0x3008, 0x3021,
801        0x302a, 0x3031,
802        0x3036, 0x3038,
803        0x303d, 0x3041,
804        0x3099, 0x309d,
805        0x30a0, 0x30a1,
806        0x30fb, 0x30fc,
807        0x31c0, 0x31f0,
808        0x321d, 0x3220,
809        0x3250, 0x3260,
810        0x327c, 0x327f,
811        0x32b1, 0x32c0,
812        0x32cc, 0x32d0,
813        0x3377, 0x337b,
814        0x33de, 0x33e0,
815        0x33ff, 0x3400,
816        0x4dc0, 0x4e00,
817        0xa490, 0xa4d0,
818        0xa60d, 0xa610,
819        0xa66f, 0xa680,
820        0xa69f, 0xa6a0,
821        0xa6f0, 0xa6f2,
822        0xa700, 0xa722,
823        0xa788, 0xa789,
824        0xa802, 0xa803,
825        0xa806, 0xa807,
826        0xa80b, 0xa80c,
827        0xa825, 0xa827,
828        0xa828, 0xa830,
829        0xa838, 0xa840,
830        0xa874, 0xa880,
831        0xa8c4, 0xa8ce,
832        0xa8e0, 0xa8f2,
833        0xa926, 0xa92e,
834        0xa947, 0xa952,
835        0xa980, 0xa983,
836        0xa9b3, 0xa9b4,
837        0xa9b6, 0xa9ba,
838        0xa9bc, 0xa9bd,
839        0xaa29, 0xaa2f,
840        0xaa31, 0xaa33,
841        0xaa35, 0xaa40,
842        0xaa43, 0xaa44,
843        0xaa4c, 0xaa4d,
844        0xaab0, 0xaab1,
845        0xaab2, 0xaab5,
846        0xaab7, 0xaab9,
847        0xaabe, 0xaac0,
848        0xaac1, 0xaac2,
849        0xaaec, 0xaaee,
850        0xaaf6, 0xab01,
851        0xabe5, 0xabe6,
852        0xabe8, 0xabe9,
853        0xabed, 0xabf0,
854        0xfb1e, 0xfb1f,
855        0xfb29, 0xfb2a,
856        0xfd3e, 0xfd50,
857        0xfdfd, 0xfe70,
858        0xfeff, 0xff21,
859        0xff3b, 0xff41,
860        0xff5b, 0xff66,
861        0xffe0, 0x10000,
862        0x10101, 0x10102,
863        0x10140, 0x101d0,
864        0x101fd, 0x10280,
865        0x1091f, 0x10920,
866        0x10a01, 0x10a10,
867        0x10a38, 0x10a40,
868        0x10b39, 0x10b40,
869        0x10e60, 0x11000,
870        0x11001, 0x11002,
871        0x11038, 0x11047,
872        0x11052, 0x11066,
873        0x11080, 0x11082,
874        0x110b3, 0x110b7,
875        0x110b9, 0x110bb,
876        0x11100, 0x11103,
877        0x11127, 0x1112c,
878        0x1112d, 0x11136,
879        0x11180, 0x11182,
880        0x111b6, 0x111bf,
881        0x116ab, 0x116ac,
882        0x116ad, 0x116ae,
883        0x116b0, 0x116b6,
884        0x116b7, 0x116c0,
885        0x16f8f, 0x16f93,
886        0x1d167, 0x1d16a,
887        0x1d173, 0x1d183,
888        0x1d185, 0x1d18c,
889        0x1d1aa, 0x1d1ae,
890        0x1d200, 0x1d360,
891        0x1d6db, 0x1d6dc,
892        0x1d715, 0x1d716,
893        0x1d74f, 0x1d750,
894        0x1d789, 0x1d78a,
895        0x1d7c3, 0x1d7c4,
896        0x1d7ce, 0x1ee00,
897        0x1eef0, 0x1f110,
898        0x1f16a, 0x1f170,
899        0x1f300, 0x1f48c,
900        0x1f48d, 0x1f524,
901        0x1f525, 0x20000,
902        0xe0001, 0xf0000,
903        0x10fffe, 0x10ffff // sentinel
904    };
905
906
907    // use a binary search with a cache
908
909    private transient volatile int stCache = 0;
910
911    private boolean isStrongDirectional(char c) {
912        int cachedIndex = stCache;
913        if (c < strongTable[cachedIndex]) {
914            cachedIndex = search(c, strongTable, 0, cachedIndex);
915        } else if (c >= strongTable[cachedIndex + 1]) {
916            cachedIndex = search(c, strongTable, cachedIndex + 1,
917                                 strongTable.length - cachedIndex - 1);
918        }
919        boolean val = (cachedIndex & 0x1) == 1;
920        stCache = cachedIndex;
921        return val;
922    }
923
924    private static int getKeyFromMask(int mask) {
925        int key = 0;
926        while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
927            ++key;
928        }
929        if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
930            throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
931        }
932        return key;
933    }
934
935    /**
936     * Returns a shaper for the provided unicode range.  All
937     * Latin-1 (EUROPEAN) digits are converted
938     * to the corresponding decimal unicode digits.
939     * @param singleRange the specified Unicode range
940     * @return a non-contextual numeric shaper
941     * @throws IllegalArgumentException if the range is not a single range
942     */
943    public static NumericShaper getShaper(int singleRange) {
944        int key = getKeyFromMask(singleRange);
945        return new NumericShaper(key, singleRange);
946    }
947
948    /**
949     * Returns a shaper for the provided Unicode
950     * range. All Latin-1 (EUROPEAN) digits are converted to the
951     * corresponding decimal digits of the specified Unicode range.
952     *
953     * @param singleRange the Unicode range given by a {@link
954     *                    NumericShaper.Range} constant.
955     * @return a non-contextual {@code NumericShaper}.
956     * @throws NullPointerException if {@code singleRange} is {@code null}
957     * @since 1.7
958     */
959    public static NumericShaper getShaper(Range singleRange) {
960        return new NumericShaper(singleRange, EnumSet.of(singleRange));
961    }
962
963    /**
964     * Returns a contextual shaper for the provided unicode range(s).
965     * Latin-1 (EUROPEAN) digits are converted to the decimal digits
966     * corresponding to the range of the preceding text, if the
967     * range is one of the provided ranges.  Multiple ranges are
968     * represented by or-ing the values together, such as,
969     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
970     * shaper assumes EUROPEAN as the starting context, that is, if
971     * EUROPEAN digits are encountered before any strong directional
972     * text in the string, the context is presumed to be EUROPEAN, and
973     * so the digits will not shape.
974     * @param ranges the specified Unicode ranges
975     * @return a shaper for the specified ranges
976     */
977    public static NumericShaper getContextualShaper(int ranges) {
978        ranges |= CONTEXTUAL_MASK;
979        return new NumericShaper(EUROPEAN_KEY, ranges);
980    }
981
982    /**
983     * Returns a contextual shaper for the provided Unicode
984     * range(s). The Latin-1 (EUROPEAN) digits are converted to the
985     * decimal digits corresponding to the range of the preceding
986     * text, if the range is one of the provided ranges.
987     *
988     * <p>The shaper assumes EUROPEAN as the starting context, that
989     * is, if EUROPEAN digits are encountered before any strong
990     * directional text in the string, the context is presumed to be
991     * EUROPEAN, and so the digits will not shape.
992     *
993     * @param ranges the specified Unicode ranges
994     * @return a contextual shaper for the specified ranges
995     * @throws NullPointerException if {@code ranges} is {@code null}.
996     * @since 1.7
997     */
998    public static NumericShaper getContextualShaper(Set<Range> ranges) {
999        NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
1000        shaper.mask = CONTEXTUAL_MASK;
1001        return shaper;
1002    }
1003
1004    /**
1005     * Returns a contextual shaper for the provided unicode range(s).
1006     * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
1007     * corresponding to the range of the preceding text, if the
1008     * range is one of the provided ranges.  Multiple ranges are
1009     * represented by or-ing the values together, for example,
1010     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
1011     * shaper uses defaultContext as the starting context.
1012     * @param ranges the specified Unicode ranges
1013     * @param defaultContext the starting context, such as
1014     * <code>NumericShaper.EUROPEAN</code>
1015     * @return a shaper for the specified Unicode ranges.
1016     * @throws IllegalArgumentException if the specified
1017     * <code>defaultContext</code> is not a single valid range.
1018     */
1019    public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
1020        int key = getKeyFromMask(defaultContext);
1021        ranges |= CONTEXTUAL_MASK;
1022        return new NumericShaper(key, ranges);
1023    }
1024
1025    /**
1026     * Returns a contextual shaper for the provided Unicode range(s).
1027     * The Latin-1 (EUROPEAN) digits will be converted to the decimal
1028     * digits corresponding to the range of the preceding text, if the
1029     * range is one of the provided ranges. The shaper uses {@code
1030     * defaultContext} as the starting context.
1031     *
1032     * @param ranges the specified Unicode ranges
1033     * @param defaultContext the starting context, such as
1034     *                       {@code NumericShaper.Range.EUROPEAN}
1035     * @return a contextual shaper for the specified Unicode ranges.
1036     * @throws NullPointerException
1037     *         if {@code ranges} or {@code defaultContext} is {@code null}
1038     * @since 1.7
1039     */
1040    public static NumericShaper getContextualShaper(Set<Range> ranges,
1041                                                    Range defaultContext) {
1042        if (defaultContext == null) {
1043            throw new NullPointerException();
1044        }
1045        NumericShaper shaper = new NumericShaper(defaultContext, ranges);
1046        shaper.mask = CONTEXTUAL_MASK;
1047        return shaper;
1048    }
1049
1050    /**
1051     * Private constructor.
1052     */
1053    private NumericShaper(int key, int mask) {
1054        this.key = key;
1055        this.mask = mask;
1056    }
1057
1058    private NumericShaper(Range defaultContext, Set<Range> ranges) {
1059        shapingRange = defaultContext;
1060        rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
1061
1062        // Give precedance to EASTERN_ARABIC if both ARABIC and
1063        // EASTERN_ARABIC are specified.
1064        if (rangeSet.contains(Range.EASTERN_ARABIC)
1065            && rangeSet.contains(Range.ARABIC)) {
1066            rangeSet.remove(Range.ARABIC);
1067        }
1068
1069        // As well as the above case, give precedance to TAI_THAM_THAM if both
1070        // TAI_THAM_HORA and TAI_THAM_THAM are specified.
1071        if (rangeSet.contains(Range.TAI_THAM_THAM)
1072            && rangeSet.contains(Range.TAI_THAM_HORA)) {
1073            rangeSet.remove(Range.TAI_THAM_HORA);
1074        }
1075
1076        rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
1077        if (rangeArray.length > BSEARCH_THRESHOLD) {
1078            // sort rangeArray for binary search
1079            Arrays.sort(rangeArray,
1080                        new Comparator<Range>() {
1081                            public int compare(Range s1, Range s2) {
1082                                return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
1083                            }
1084                        });
1085        }
1086    }
1087
1088    /**
1089     * Converts the digits in the text that occur between start and
1090     * start + count.
1091     * @param text an array of characters to convert
1092     * @param start the index into <code>text</code> to start
1093     *        converting
1094     * @param count the number of characters in <code>text</code>
1095     *        to convert
1096     * @throws IndexOutOfBoundsException if start or start + count is
1097     *        out of bounds
1098     * @throws NullPointerException if text is null
1099     */
1100    public void shape(char[] text, int start, int count) {
1101        checkParams(text, start, count);
1102        if (isContextual()) {
1103            if (rangeSet == null) {
1104                shapeContextually(text, start, count, key);
1105            } else {
1106                shapeContextually(text, start, count, shapingRange);
1107            }
1108        } else {
1109            shapeNonContextually(text, start, count);
1110        }
1111    }
1112
1113    /**
1114     * Converts the digits in the text that occur between start and
1115     * start + count, using the provided context.
1116     * Context is ignored if the shaper is not a contextual shaper.
1117     * @param text an array of characters
1118     * @param start the index into <code>text</code> to start
1119     *        converting
1120     * @param count the number of characters in <code>text</code>
1121     *        to convert
1122     * @param context the context to which to convert the
1123     *        characters, such as <code>NumericShaper.EUROPEAN</code>
1124     * @throws IndexOutOfBoundsException if start or start + count is
1125     *        out of bounds
1126     * @throws NullPointerException if text is null
1127     * @throws IllegalArgumentException if this is a contextual shaper
1128     * and the specified <code>context</code> is not a single valid
1129     * range.
1130     */
1131    public void shape(char[] text, int start, int count, int context) {
1132        checkParams(text, start, count);
1133        if (isContextual()) {
1134            int ctxKey = getKeyFromMask(context);
1135            if (rangeSet == null) {
1136                shapeContextually(text, start, count, ctxKey);
1137            } else {
1138                shapeContextually(text, start, count, Range.values()[ctxKey]);
1139            }
1140        } else {
1141            shapeNonContextually(text, start, count);
1142        }
1143    }
1144
1145    /**
1146     * Converts the digits in the text that occur between {@code
1147     * start} and {@code start + count}, using the provided {@code
1148     * context}. {@code Context} is ignored if the shaper is not a
1149     * contextual shaper.
1150     *
1151     * @param text  a {@code char} array
1152     * @param start the index into {@code text} to start converting
1153     * @param count the number of {@code char}s in {@code text}
1154     *              to convert
1155     * @param context the context to which to convert the characters,
1156     *                such as {@code NumericShaper.Range.EUROPEAN}
1157     * @throws IndexOutOfBoundsException
1158     *         if {@code start} or {@code start + count} is out of bounds
1159     * @throws NullPointerException
1160     *         if {@code text} or {@code context} is null
1161     * @since 1.7
1162     */
1163    public void shape(char[] text, int start, int count, Range context) {
1164        checkParams(text, start, count);
1165        if (context == null) {
1166            throw new NullPointerException("context is null");
1167        }
1168
1169        if (isContextual()) {
1170            if (rangeSet != null) {
1171                shapeContextually(text, start, count, context);
1172            } else {
1173                int key = Range.toRangeIndex(context);
1174                if (key >= 0) {
1175                    shapeContextually(text, start, count, key);
1176                } else {
1177                    shapeContextually(text, start, count, shapingRange);
1178                }
1179            }
1180        } else {
1181            shapeNonContextually(text, start, count);
1182        }
1183    }
1184
1185    private void checkParams(char[] text, int start, int count) {
1186        if (text == null) {
1187            throw new NullPointerException("text is null");
1188        }
1189        if ((start < 0)
1190            || (start > text.length)
1191            || ((start + count) < 0)
1192            || ((start + count) > text.length)) {
1193            throw new IndexOutOfBoundsException(
1194                "bad start or count for text of length " + text.length);
1195        }
1196    }
1197
1198    /**
1199     * Returns a <code>boolean</code> indicating whether or not
1200     * this shaper shapes contextually.
1201     * @return <code>true</code> if this shaper is contextual;
1202     *         <code>false</code> otherwise.
1203     */
1204    public boolean isContextual() {
1205        return (mask & CONTEXTUAL_MASK) != 0;
1206    }
1207
1208    /**
1209     * Returns an <code>int</code> that ORs together the values for
1210     * all the ranges that will be shaped.
1211     * <p>
1212     * For example, to check if a shaper shapes to Arabic, you would use the
1213     * following:
1214     * <blockquote>
1215     *   {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) &#123; ... }
1216     * </blockquote>
1217     *
1218     * <p>Note that this method supports only the bit mask-based
1219     * ranges. Call {@link #getRangeSet()} for the enum-based ranges.
1220     *
1221     * @return the values for all the ranges to be shaped.
1222     */
1223    public int getRanges() {
1224        return mask & ~CONTEXTUAL_MASK;
1225    }
1226
1227    /**
1228     * Returns a {@code Set} representing all the Unicode ranges in
1229     * this {@code NumericShaper} that will be shaped.
1230     *
1231     * @return all the Unicode ranges to be shaped.
1232     * @since 1.7
1233     */
1234    public Set<Range> getRangeSet() {
1235        if (rangeSet != null) {
1236            return EnumSet.copyOf(rangeSet);
1237        }
1238        return Range.maskToRangeSet(mask);
1239    }
1240
1241    /**
1242     * Perform non-contextual shaping.
1243     */
1244    private void shapeNonContextually(char[] text, int start, int count) {
1245        int base;
1246        char minDigit = '0';
1247        if (shapingRange != null) {
1248            base = shapingRange.getDigitBase();
1249            minDigit += shapingRange.getNumericBase();
1250        } else {
1251            base = bases[key];
1252            if (key == ETHIOPIC_KEY) {
1253                minDigit++; // Ethiopic doesn't use decimal zero
1254            }
1255        }
1256        for (int i = start, e = start + count; i < e; ++i) {
1257            char c = text[i];
1258            if (c >= minDigit && c <= '\u0039') {
1259                text[i] = (char)(c + base);
1260            }
1261        }
1262    }
1263
1264    /**
1265     * Perform contextual shaping.
1266     * Synchronized to protect caches used in getContextKey.
1267     */
1268    private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
1269
1270        // if we don't support this context, then don't shape
1271        if ((mask & (1<<ctxKey)) == 0) {
1272            ctxKey = EUROPEAN_KEY;
1273        }
1274        int lastkey = ctxKey;
1275
1276        int base = bases[ctxKey];
1277        char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1278
1279        synchronized (NumericShaper.class) {
1280            for (int i = start, e = start + count; i < e; ++i) {
1281                char c = text[i];
1282                if (c >= minDigit && c <= '\u0039') {
1283                    text[i] = (char)(c + base);
1284                }
1285
1286                if (isStrongDirectional(c)) {
1287                    int newkey = getContextKey(c);
1288                    if (newkey != lastkey) {
1289                        lastkey = newkey;
1290
1291                        ctxKey = newkey;
1292                        if (((mask & EASTERN_ARABIC) != 0) &&
1293                             (ctxKey == ARABIC_KEY ||
1294                              ctxKey == EASTERN_ARABIC_KEY)) {
1295                            ctxKey = EASTERN_ARABIC_KEY;
1296                        } else if (((mask & ARABIC) != 0) &&
1297                             (ctxKey == ARABIC_KEY ||
1298                              ctxKey == EASTERN_ARABIC_KEY)) {
1299                            ctxKey = ARABIC_KEY;
1300                        } else if ((mask & (1<<ctxKey)) == 0) {
1301                            ctxKey = EUROPEAN_KEY;
1302                        }
1303
1304                        base = bases[ctxKey];
1305
1306                        minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1307                    }
1308                }
1309            }
1310        }
1311    }
1312
1313    private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
1314        // if we don't support the specified context, then don't shape.
1315        if (ctxKey == null || !rangeSet.contains(ctxKey)) {
1316            ctxKey = Range.EUROPEAN;
1317        }
1318
1319        Range lastKey = ctxKey;
1320        int base = ctxKey.getDigitBase();
1321        char minDigit = (char)('0' + ctxKey.getNumericBase());
1322        final int end = start + count;
1323        for (int i = start; i < end; ++i) {
1324            char c = text[i];
1325            if (c >= minDigit && c <= '9') {
1326                text[i] = (char)(c + base);
1327                continue;
1328            }
1329            if (isStrongDirectional(c)) {
1330                ctxKey = rangeForCodePoint(c);
1331                if (ctxKey != lastKey) {
1332                    lastKey = ctxKey;
1333                    base = ctxKey.getDigitBase();
1334                    minDigit = (char)('0' + ctxKey.getNumericBase());
1335                }
1336            }
1337        }
1338    }
1339
1340    /**
1341     * Returns a hash code for this shaper.
1342     * @return this shaper's hash code.
1343     * @see java.lang.Object#hashCode
1344     */
1345    public int hashCode() {
1346        int hash = mask;
1347        if (rangeSet != null) {
1348            // Use the CONTEXTUAL_MASK bit only for the enum-based
1349            // NumericShaper. A deserialized NumericShaper might have
1350            // bit masks.
1351            hash &= CONTEXTUAL_MASK;
1352            hash ^= rangeSet.hashCode();
1353        }
1354        return hash;
1355    }
1356
1357    /**
1358     * Returns {@code true} if the specified object is an instance of
1359     * <code>NumericShaper</code> and shapes identically to this one,
1360     * regardless of the range representations, the bit mask or the
1361     * enum. For example, the following code produces {@code "true"}.
1362     * <blockquote><pre>
1363     * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
1364     * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
1365     * System.out.println(ns1.equals(ns2));
1366     * </pre></blockquote>
1367     *
1368     * @param o the specified object to compare to this
1369     *          <code>NumericShaper</code>
1370     * @return <code>true</code> if <code>o</code> is an instance
1371     *         of <code>NumericShaper</code> and shapes in the same way;
1372     *         <code>false</code> otherwise.
1373     * @see java.lang.Object#equals(java.lang.Object)
1374     */
1375    public boolean equals(Object o) {
1376        if (o != null) {
1377            try {
1378                NumericShaper rhs = (NumericShaper)o;
1379                if (rangeSet != null) {
1380                    if (rhs.rangeSet != null) {
1381                        return isContextual() == rhs.isContextual()
1382                            && rangeSet.equals(rhs.rangeSet)
1383                            && shapingRange == rhs.shapingRange;
1384                    }
1385                    return isContextual() == rhs.isContextual()
1386                        && rangeSet.equals(Range.maskToRangeSet(rhs.mask))
1387                        && shapingRange == Range.indexToRange(rhs.key);
1388                } else if (rhs.rangeSet != null) {
1389                    Set<Range> rset = Range.maskToRangeSet(mask);
1390                    Range srange = Range.indexToRange(key);
1391                    return isContextual() == rhs.isContextual()
1392                        && rset.equals(rhs.rangeSet)
1393                        && srange == rhs.shapingRange;
1394                }
1395                return rhs.mask == mask && rhs.key == key;
1396            }
1397            catch (ClassCastException e) {
1398            }
1399        }
1400        return false;
1401    }
1402
1403    /**
1404     * Returns a <code>String</code> that describes this shaper. This method
1405     * is used for debugging purposes only.
1406     * @return a <code>String</code> describing this shaper.
1407     */
1408    public String toString() {
1409        StringBuilder buf = new StringBuilder(super.toString());
1410
1411        buf.append("[contextual:").append(isContextual());
1412
1413        String[] keyNames = null;
1414        if (isContextual()) {
1415            buf.append(", context:");
1416            buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
1417        }
1418
1419        if (rangeSet == null) {
1420            buf.append(", range(s): ");
1421            boolean first = true;
1422            for (int i = 0; i < NUM_KEYS; ++i) {
1423                if ((mask & (1 << i)) != 0) {
1424                    if (first) {
1425                        first = false;
1426                    } else {
1427                        buf.append(", ");
1428                    }
1429                    buf.append(Range.values()[i]);
1430                }
1431            }
1432        } else {
1433            buf.append(", range set: ").append(rangeSet);
1434        }
1435        buf.append(']');
1436
1437        return buf.toString();
1438    }
1439
1440    /**
1441     * Returns the index of the high bit in value (assuming le, actually
1442     * power of 2 >= value). value must be positive.
1443     */
1444    private static int getHighBit(int value) {
1445        if (value <= 0) {
1446            return -32;
1447        }
1448
1449        int bit = 0;
1450
1451        if (value >= 1 << 16) {
1452            value >>= 16;
1453            bit += 16;
1454        }
1455
1456        if (value >= 1 << 8) {
1457            value >>= 8;
1458            bit += 8;
1459        }
1460
1461        if (value >= 1 << 4) {
1462            value >>= 4;
1463            bit += 4;
1464        }
1465
1466        if (value >= 1 << 2) {
1467            value >>= 2;
1468            bit += 2;
1469        }
1470
1471        if (value >= 1 << 1) {
1472            bit += 1;
1473        }
1474
1475        return bit;
1476    }
1477
1478    /**
1479     * fast binary search over subrange of array.
1480     */
1481    private static int search(int value, int[] array, int start, int length)
1482    {
1483        int power = 1 << getHighBit(length);
1484        int extra = length - power;
1485        int probe = power;
1486        int index = start;
1487
1488        if (value >= array[index + extra]) {
1489            index += extra;
1490        }
1491
1492        while (probe > 1) {
1493            probe >>= 1;
1494
1495            if (value >= array[index + probe]) {
1496                index += probe;
1497            }
1498        }
1499
1500        return index;
1501    }
1502
1503    /**
1504     * Converts the {@code NumericShaper.Range} enum-based parameters,
1505     * if any, to the bit mask-based counterparts and writes this
1506     * object to the {@code stream}. Any enum constants that have no
1507     * bit mask-based counterparts are ignored in the conversion.
1508     *
1509     * @param stream the output stream to write to
1510     * @throws IOException if an I/O error occurs while writing to {@code stream}
1511     * @since 1.7
1512     */
1513    private void writeObject(ObjectOutputStream stream) throws IOException {
1514        if (shapingRange != null) {
1515            int index = Range.toRangeIndex(shapingRange);
1516            if (index >= 0) {
1517                key = index;
1518            }
1519        }
1520        if (rangeSet != null) {
1521            mask |= Range.toRangeMask(rangeSet);
1522        }
1523        stream.defaultWriteObject();
1524    }
1525}
1526