1/*
2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.awt.font;
27
28import java.io.IOException;
29import java.io.ObjectOutputStream;
30import java.util.Arrays;
31import java.util.Comparator;
32import java.util.EnumSet;
33import java.util.Set;
34
35/**
36 * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
37 * digits to other Unicode decimal digits.  Users of this class will
38 * primarily be people who wish to present data using
39 * national digit shapes, but find it more convenient to represent the
40 * data internally using Latin-1 (European) digits.  This does not
41 * interpret the deprecated numeric shape selector character (U+206E).
42 * <p>
43 * Instances of <code>NumericShaper</code> are typically applied
44 * as attributes to text with the
45 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
46 * of the <code>TextAttribute</code> class.
47 * For example, this code snippet causes a <code>TextLayout</code> to
48 * shape European digits to Arabic in an Arabic context:<br>
49 * <blockquote><pre>
50 * Map map = new HashMap();
51 * map.put(TextAttribute.NUMERIC_SHAPING,
52 *     NumericShaper.getContextualShaper(NumericShaper.ARABIC));
53 * FontRenderContext frc = ...;
54 * TextLayout layout = new TextLayout(text, map, frc);
55 * layout.draw(g2d, x, y);
56 * </pre></blockquote>
57 * <br>
58 * It is also possible to perform numeric shaping explicitly using instances
59 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
60 * <blockquote><pre>
61 * char[] text = ...;
62 * // shape all EUROPEAN digits (except zero) to ARABIC digits
63 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
64 * shaper.shape(text, start, count);
65 *
66 * // shape European digits to ARABIC digits if preceding text is Arabic, or
67 * // shape European digits to TAMIL digits if preceding text is Tamil, or
68 * // leave European digits alone if there is no preceding text, or
69 * // preceding text is neither Arabic nor Tamil
70 * NumericShaper shaper =
71 *     NumericShaper.getContextualShaper(NumericShaper.ARABIC |
72 *                                         NumericShaper.TAMIL,
73 *                                       NumericShaper.EUROPEAN);
74 * shaper.shape(text, start, count);
75 * </pre></blockquote>
76 *
77 * <p><b>Bit mask- and enum-based Unicode ranges</b></p>
78 *
79 * <p>This class supports two different programming interfaces to
80 * represent Unicode ranges for script-specific digits: bit
81 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
82 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
83 * Multiple ranges can be specified by ORing bit mask-based constants,
84 * such as:
85 * <blockquote><pre>
86 * NumericShaper.ARABIC | NumericShaper.TAMIL
87 * </pre></blockquote>
88 * or creating a {@code Set} with the {@link NumericShaper.Range}
89 * constants, such as:
90 * <blockquote><pre>
91 * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
92 * </pre></blockquote>
93 * The enum-based ranges are a super set of the bit mask-based ones.
94 *
95 * <p>If the two interfaces are mixed (including serialization),
96 * Unicode range values are mapped to their counterparts where such
97 * mapping is possible, such as {@code NumericShaper.Range.ARABIC}
98 * from/to {@code NumericShaper.ARABIC}.  If any unmappable range
99 * values are specified, such as {@code NumericShaper.Range.BALINESE},
100 * those ranges are ignored.
101 *
102 * <p><b>Decimal Digits Precedence</b></p>
103 *
104 * <p>A Unicode range may have more than one set of decimal digits. If
105 * multiple decimal digits sets are specified for the same Unicode
106 * range, one of the sets will take precedence as follows.
107 *
108 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence.">
109 *    <tr>
110 *       <th class="TableHeadingColor">Unicode Range</th>
111 *       <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th>
112 *       <th class="TableHeadingColor">Precedence</th>
113 *    </tr>
114 *    <tr>
115 *       <td rowspan="2">Arabic</td>
116 *       <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br>
117 *           {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
118 *       <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
119 *    </tr>
120 *    <tr>
121 *       <td>{@link NumericShaper.Range#ARABIC}<br>
122 *           {@link NumericShaper.Range#EASTERN_ARABIC}</td>
123 *       <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td>
124 *    </tr>
125 *    <tr>
126 *       <td>Tai Tham</td>
127 *       <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br>
128 *           {@link NumericShaper.Range#TAI_THAM_THAM}</td>
129 *       <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td>
130 *    </tr>
131 * </table>
132 *
133 * @since 1.4
134 */
135
136public final class NumericShaper implements java.io.Serializable {
137    /**
138     * A {@code NumericShaper.Range} represents a Unicode range of a
139     * script having its own decimal digits. For example, the {@link
140     * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
141     * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
142     *
143     * <p>The <code>Range</code> enum replaces the traditional bit
144     * mask-based values (e.g., {@link NumericShaper#ARABIC}), and
145     * supports more Unicode ranges than the bit mask-based ones. For
146     * example, the following code using the bit mask:
147     * <blockquote><pre>
148     * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
149     *                                     NumericShaper.TAMIL,
150     *                                   NumericShaper.EUROPEAN);
151     * </pre></blockquote>
152     * can be written using this enum as:
153     * <blockquote><pre>
154     * NumericShaper.getContextualShaper(EnumSet.of(
155     *                                     NumericShaper.Range.ARABIC,
156     *                                     NumericShaper.Range.TAMIL),
157     *                                   NumericShaper.Range.EUROPEAN);
158     * </pre></blockquote>
159     *
160     * @since 1.7
161     */
162    public static enum Range {
163        // The order of EUROPEAN to MOGOLIAN must be consistent
164        // with the bitmask-based constants.
165        /**
166         * The Latin (European) range with the Latin (ASCII) digits.
167         */
168        EUROPEAN        ('\u0030', '\u0000', '\u0300'),
169        /**
170         * The Arabic range with the Arabic-Indic digits.
171         */
172        ARABIC          ('\u0660', '\u0600', '\u0780'),
173        /**
174         * The Arabic range with the Eastern Arabic-Indic digits.
175         */
176        EASTERN_ARABIC  ('\u06f0', '\u0600', '\u0780'),
177        /**
178         * The Devanagari range with the Devanagari digits.
179         */
180        DEVANAGARI      ('\u0966', '\u0900', '\u0980'),
181        /**
182         * The Bengali range with the Bengali digits.
183         */
184        BENGALI         ('\u09e6', '\u0980', '\u0a00'),
185        /**
186         * The Gurmukhi range with the Gurmukhi digits.
187         */
188        GURMUKHI        ('\u0a66', '\u0a00', '\u0a80'),
189        /**
190         * The Gujarati range with the Gujarati digits.
191         */
192        GUJARATI        ('\u0ae6', '\u0b00', '\u0b80'),
193        /**
194         * The Oriya range with the Oriya digits.
195         */
196        ORIYA           ('\u0b66', '\u0b00', '\u0b80'),
197        /**
198         * The Tamil range with the Tamil digits.
199         */
200        TAMIL           ('\u0be6', '\u0b80', '\u0c00'),
201        /**
202         * The Telugu range with the Telugu digits.
203         */
204        TELUGU          ('\u0c66', '\u0c00', '\u0c80'),
205        /**
206         * The Kannada range with the Kannada digits.
207         */
208        KANNADA         ('\u0ce6', '\u0c80', '\u0d00'),
209        /**
210         * The Malayalam range with the Malayalam digits.
211         */
212        MALAYALAM       ('\u0d66', '\u0d00', '\u0d80'),
213        /**
214         * The Thai range with the Thai digits.
215         */
216        THAI            ('\u0e50', '\u0e00', '\u0e80'),
217        /**
218         * The Lao range with the Lao digits.
219         */
220        LAO             ('\u0ed0', '\u0e80', '\u0f00'),
221        /**
222         * The Tibetan range with the Tibetan digits.
223         */
224        TIBETAN         ('\u0f20', '\u0f00', '\u1000'),
225        /**
226         * The Myanmar range with the Myanmar digits.
227         */
228        MYANMAR         ('\u1040', '\u1000', '\u1080'),
229        /**
230         * The Ethiopic range with the Ethiopic digits. Ethiopic
231         * does not have a decimal digit 0 so Latin (European) 0 is
232         * used.
233         */
234        ETHIOPIC        ('\u1369', '\u1200', '\u1380') {
235            @Override
236            char getNumericBase() { return 1; }
237        },
238        /**
239         * The Khmer range with the Khmer digits.
240         */
241        KHMER           ('\u17e0', '\u1780', '\u1800'),
242        /**
243         * The Mongolian range with the Mongolian digits.
244         */
245        MONGOLIAN       ('\u1810', '\u1800', '\u1900'),
246        // The order of EUROPEAN to MOGOLIAN must be consistent
247        // with the bitmask-based constants.
248
249        /**
250         * The N'Ko range with the N'Ko digits.
251         */
252        NKO             ('\u07c0', '\u07c0', '\u0800'),
253        /**
254         * The Myanmar range with the Myanmar Shan digits.
255         */
256        MYANMAR_SHAN    ('\u1090', '\u1000', '\u10a0'),
257        /**
258         * The Limbu range with the Limbu digits.
259         */
260        LIMBU           ('\u1946', '\u1900', '\u1950'),
261        /**
262         * The New Tai Lue range with the New Tai Lue digits.
263         */
264        NEW_TAI_LUE     ('\u19d0', '\u1980', '\u19e0'),
265        /**
266         * The Balinese range with the Balinese digits.
267         */
268        BALINESE        ('\u1b50', '\u1b00', '\u1b80'),
269        /**
270         * The Sundanese range with the Sundanese digits.
271         */
272        SUNDANESE       ('\u1bb0', '\u1b80', '\u1bc0'),
273        /**
274         * The Lepcha range with the Lepcha digits.
275         */
276        LEPCHA          ('\u1c40', '\u1c00', '\u1c50'),
277        /**
278         * The Ol Chiki range with the Ol Chiki digits.
279         */
280        OL_CHIKI        ('\u1c50', '\u1c50', '\u1c80'),
281        /**
282         * The Vai range with the Vai digits.
283         */
284        VAI             ('\ua620', '\ua500', '\ua640'),
285        /**
286         * The Saurashtra range with the Saurashtra digits.
287         */
288        SAURASHTRA      ('\ua8d0', '\ua880', '\ua8e0'),
289        /**
290         * The Kayah Li range with the Kayah Li digits.
291         */
292        KAYAH_LI        ('\ua900', '\ua900', '\ua930'),
293        /**
294         * The Cham range with the Cham digits.
295         */
296        CHAM            ('\uaa50', '\uaa00', '\uaa60'),
297        /**
298         * The Tai Tham Hora range with the Tai Tham Hora digits.
299         */
300        TAI_THAM_HORA   ('\u1a80', '\u1a20', '\u1ab0'),
301        /**
302         * The Tai Tham Tham range with the Tai Tham Tham digits.
303         */
304        TAI_THAM_THAM   ('\u1a90', '\u1a20', '\u1ab0'),
305        /**
306         * The Javanese range with the Javanese digits.
307         */
308        JAVANESE        ('\ua9d0', '\ua980', '\ua9e0'),
309        /**
310         * The Meetei Mayek range with the Meetei Mayek digits.
311         */
312        MEETEI_MAYEK    ('\uabf0', '\uabc0', '\uac00');
313
314        private static int toRangeIndex(Range script) {
315            int index = script.ordinal();
316            return index < NUM_KEYS ? index : -1;
317        }
318
319        private static Range indexToRange(int index) {
320            return index < NUM_KEYS ? Range.values()[index] : null;
321        }
322
323        private static int toRangeMask(Set<Range> ranges) {
324            int m = 0;
325            for (Range range : ranges) {
326                int index = range.ordinal();
327                if (index < NUM_KEYS) {
328                    m |= 1 << index;
329                }
330            }
331            return m;
332        }
333
334        private static Set<Range> maskToRangeSet(int mask) {
335            Set<Range> set = EnumSet.noneOf(Range.class);
336            Range[] a = Range.values();
337            for (int i = 0; i < NUM_KEYS; i++) {
338                if ((mask & (1 << i)) != 0) {
339                    set.add(a[i]);
340                }
341            }
342            return set;
343        }
344
345        // base character of range digits
346        private final int base;
347        // Unicode range
348        private final int start, // inclusive
349                          end;   // exclusive
350
351        private Range(int base, int start, int end) {
352            this.base = base - ('0' + getNumericBase());
353            this.start = start;
354            this.end = end;
355        }
356
357        private int getDigitBase() {
358            return base;
359        }
360
361        char getNumericBase() {
362            return 0;
363        }
364
365        private boolean inRange(int c) {
366            return start <= c && c < end;
367        }
368    }
369
370    /** index of context for contextual shaping - values range from 0 to 18 */
371    private int key;
372
373    /** flag indicating whether to shape contextually (high bit) and which
374     *  digit ranges to shape (bits 0-18)
375     */
376    private int mask;
377
378    /**
379     * The context {@code Range} for contextual shaping or the {@code
380     * Range} for non-contextual shaping. {@code null} for the bit
381     * mask-based API.
382     *
383     * @since 1.7
384     */
385    private Range shapingRange;
386
387    /**
388     * {@code Set<Range>} indicating which Unicode ranges to
389     * shape. {@code null} for the bit mask-based API.
390     */
391    private transient Set<Range> rangeSet;
392
393    /**
394     * rangeSet.toArray() value. Sorted by Range.base when the number
395     * of elements is greater then BSEARCH_THRESHOLD.
396     */
397    private transient Range[] rangeArray;
398
399    /**
400     * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
401     */
402    private static final int BSEARCH_THRESHOLD = 3;
403
404    private static final long serialVersionUID = -8022764705923730308L;
405
406    /** Identifies the Latin-1 (European) and extended range, and
407     *  Latin-1 (European) decimal base.
408     */
409    public static final int EUROPEAN = 1<<0;
410
411    /** Identifies the ARABIC range and decimal base. */
412    public static final int ARABIC = 1<<1;
413
414    /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
415    public static final int EASTERN_ARABIC = 1<<2;
416
417    /** Identifies the DEVANAGARI range and decimal base. */
418    public static final int DEVANAGARI = 1<<3;
419
420    /** Identifies the BENGALI range and decimal base. */
421    public static final int BENGALI = 1<<4;
422
423    /** Identifies the GURMUKHI range and decimal base. */
424    public static final int GURMUKHI = 1<<5;
425
426    /** Identifies the GUJARATI range and decimal base. */
427    public static final int GUJARATI = 1<<6;
428
429    /** Identifies the ORIYA range and decimal base. */
430    public static final int ORIYA = 1<<7;
431
432    /** Identifies the TAMIL range and decimal base. */
433    // TAMIL DIGIT ZERO was added in Unicode 4.1
434    public static final int TAMIL = 1<<8;
435
436    /** Identifies the TELUGU range and decimal base. */
437    public static final int TELUGU = 1<<9;
438
439    /** Identifies the KANNADA range and decimal base. */
440    public static final int KANNADA = 1<<10;
441
442    /** Identifies the MALAYALAM range and decimal base. */
443    public static final int MALAYALAM = 1<<11;
444
445    /** Identifies the THAI range and decimal base. */
446    public static final int THAI = 1<<12;
447
448    /** Identifies the LAO range and decimal base. */
449    public static final int LAO = 1<<13;
450
451    /** Identifies the TIBETAN range and decimal base. */
452    public static final int TIBETAN = 1<<14;
453
454    /** Identifies the MYANMAR range and decimal base. */
455    public static final int MYANMAR = 1<<15;
456
457    /** Identifies the ETHIOPIC range and decimal base. */
458    public static final int ETHIOPIC = 1<<16;
459
460    /** Identifies the KHMER range and decimal base. */
461    public static final int KHMER = 1<<17;
462
463    /** Identifies the MONGOLIAN range and decimal base. */
464    public static final int MONGOLIAN = 1<<18;
465
466    /** Identifies all ranges, for full contextual shaping.
467     *
468     * <p>This constant specifies all of the bit mask-based
469     * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
470     * specify all of the enum-based ranges.
471     */
472    public static final int ALL_RANGES = 0x0007ffff;
473
474    private static final int EUROPEAN_KEY = 0;
475    private static final int ARABIC_KEY = 1;
476    private static final int EASTERN_ARABIC_KEY = 2;
477    private static final int DEVANAGARI_KEY = 3;
478    private static final int BENGALI_KEY = 4;
479    private static final int GURMUKHI_KEY = 5;
480    private static final int GUJARATI_KEY = 6;
481    private static final int ORIYA_KEY = 7;
482    private static final int TAMIL_KEY = 8;
483    private static final int TELUGU_KEY = 9;
484    private static final int KANNADA_KEY = 10;
485    private static final int MALAYALAM_KEY = 11;
486    private static final int THAI_KEY = 12;
487    private static final int LAO_KEY = 13;
488    private static final int TIBETAN_KEY = 14;
489    private static final int MYANMAR_KEY = 15;
490    private static final int ETHIOPIC_KEY = 16;
491    private static final int KHMER_KEY = 17;
492    private static final int MONGOLIAN_KEY = 18;
493
494    private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
495
496    private static final int CONTEXTUAL_MASK = 1<<31;
497
498    private static final char[] bases = {
499        '\u0030' - '\u0030', // EUROPEAN
500        '\u0660' - '\u0030', // ARABIC-INDIC
501        '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
502        '\u0966' - '\u0030', // DEVANAGARI
503        '\u09e6' - '\u0030', // BENGALI
504        '\u0a66' - '\u0030', // GURMUKHI
505        '\u0ae6' - '\u0030', // GUJARATI
506        '\u0b66' - '\u0030', // ORIYA
507        '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
508        '\u0c66' - '\u0030', // TELUGU
509        '\u0ce6' - '\u0030', // KANNADA
510        '\u0d66' - '\u0030', // MALAYALAM
511        '\u0e50' - '\u0030', // THAI
512        '\u0ed0' - '\u0030', // LAO
513        '\u0f20' - '\u0030', // TIBETAN
514        '\u1040' - '\u0030', // MYANMAR
515        '\u1369' - '\u0031', // ETHIOPIC - no zero
516        '\u17e0' - '\u0030', // KHMER
517        '\u1810' - '\u0030', // MONGOLIAN
518    };
519
520    // some ranges adjoin or overlap, rethink if we want to do a binary search on this
521
522    private static final char[] contexts = {
523        '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
524        '\u0600', '\u0780', // ARABIC
525        '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
526        '\u0900', '\u0980', // DEVANAGARI
527        '\u0980', '\u0a00', // BENGALI
528        '\u0a00', '\u0a80', // GURMUKHI
529        '\u0a80', '\u0b00', // GUJARATI
530        '\u0b00', '\u0b80', // ORIYA
531        '\u0b80', '\u0c00', // TAMIL
532        '\u0c00', '\u0c80', // TELUGU
533        '\u0c80', '\u0d00', // KANNADA
534        '\u0d00', '\u0d80', // MALAYALAM
535        '\u0e00', '\u0e80', // THAI
536        '\u0e80', '\u0f00', // LAO
537        '\u0f00', '\u1000', // TIBETAN
538        '\u1000', '\u1080', // MYANMAR
539        '\u1200', '\u1380', // ETHIOPIC - note missing zero
540        '\u1780', '\u1800', // KHMER
541        '\u1800', '\u1900', // MONGOLIAN
542        '\uffff',
543    };
544
545    // assume most characters are near each other so probing the cache is infrequent,
546    // and a linear probe is ok.
547
548    private static int ctCache = 0;
549    private static int ctCacheLimit = contexts.length - 2;
550
551    // warning, synchronize access to this as it modifies state
552    private static int getContextKey(char c) {
553        if (c < contexts[ctCache]) {
554            while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
555        } else if (c >= contexts[ctCache + 1]) {
556            while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
557        }
558
559        // if we're not in a known range, then return EUROPEAN as the range key
560        return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
561    }
562
563    // cache for the NumericShaper.Range version
564    private transient volatile Range currentRange = Range.EUROPEAN;
565
566    private Range rangeForCodePoint(final int codepoint) {
567        if (currentRange.inRange(codepoint)) {
568            return currentRange;
569        }
570
571        final Range[] ranges = rangeArray;
572        if (ranges.length > BSEARCH_THRESHOLD) {
573            int lo = 0;
574            int hi = ranges.length - 1;
575            while (lo <= hi) {
576                int mid = (lo + hi) / 2;
577                Range range = ranges[mid];
578                if (codepoint < range.start) {
579                    hi = mid - 1;
580                } else if (codepoint >= range.end) {
581                    lo = mid + 1;
582                } else {
583                    currentRange = range;
584                    return range;
585                }
586            }
587        } else {
588            for (int i = 0; i < ranges.length; i++) {
589                if (ranges[i].inRange(codepoint)) {
590                    return ranges[i];
591                }
592            }
593        }
594        return Range.EUROPEAN;
595    }
596
597    /*
598     * A range table of strong directional characters (types L, R, AL).
599     * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
600     * characters, odd (right) indexes are starts of ranges of strong directional
601     * characters.
602     */
603    private static int[] strongTable = {
604        0x0000, 0x0041,
605        0x005b, 0x0061,
606        0x007b, 0x00aa,
607        0x00ab, 0x00b5,
608        0x00b6, 0x00ba,
609        0x00bb, 0x00c0,
610        0x00d7, 0x00d8,
611        0x00f7, 0x00f8,
612        0x02b9, 0x02bb,
613        0x02c2, 0x02d0,
614        0x02d2, 0x02e0,
615        0x02e5, 0x02ee,
616        0x02ef, 0x0370,
617        0x0374, 0x0376,
618        0x037e, 0x0386,
619        0x0387, 0x0388,
620        0x03f6, 0x03f7,
621        0x0483, 0x048a,
622        0x058a, 0x05be,
623        0x05bf, 0x05c0,
624        0x05c1, 0x05c3,
625        0x05c4, 0x05c6,
626        0x05c7, 0x05d0,
627        0x0600, 0x0608,
628        0x0609, 0x060b,
629        0x060c, 0x060d,
630        0x060e, 0x061b,
631        0x064b, 0x066d,
632        0x0670, 0x0671,
633        0x06d6, 0x06e5,
634        0x06e7, 0x06ee,
635        0x06f0, 0x06fa,
636        0x070f, 0x0710,
637        0x0711, 0x0712,
638        0x0730, 0x074d,
639        0x07a6, 0x07b1,
640        0x07eb, 0x07f4,
641        0x07f6, 0x07fa,
642        0x0816, 0x081a,
643        0x081b, 0x0824,
644        0x0825, 0x0828,
645        0x0829, 0x0830,
646        0x0859, 0x085e,
647        0x0900, 0x0903,
648        0x093a, 0x093b,
649        0x093c, 0x093d,
650        0x0941, 0x0949,
651        0x094d, 0x094e,
652        0x0951, 0x0958,
653        0x0962, 0x0964,
654        0x0981, 0x0982,
655        0x09bc, 0x09bd,
656        0x09c1, 0x09c7,
657        0x09cd, 0x09ce,
658        0x09e2, 0x09e6,
659        0x09f2, 0x09f4,
660        0x09fb, 0x0a03,
661        0x0a3c, 0x0a3e,
662        0x0a41, 0x0a59,
663        0x0a70, 0x0a72,
664        0x0a75, 0x0a83,
665        0x0abc, 0x0abd,
666        0x0ac1, 0x0ac9,
667        0x0acd, 0x0ad0,
668        0x0ae2, 0x0ae6,
669        0x0af1, 0x0b02,
670        0x0b3c, 0x0b3d,
671        0x0b3f, 0x0b40,
672        0x0b41, 0x0b47,
673        0x0b4d, 0x0b57,
674        0x0b62, 0x0b66,
675        0x0b82, 0x0b83,
676        0x0bc0, 0x0bc1,
677        0x0bcd, 0x0bd0,
678        0x0bf3, 0x0c01,
679        0x0c3e, 0x0c41,
680        0x0c46, 0x0c58,
681        0x0c62, 0x0c66,
682        0x0c78, 0x0c7f,
683        0x0cbc, 0x0cbd,
684        0x0ccc, 0x0cd5,
685        0x0ce2, 0x0ce6,
686        0x0d41, 0x0d46,
687        0x0d4d, 0x0d4e,
688        0x0d62, 0x0d66,
689        0x0dca, 0x0dcf,
690        0x0dd2, 0x0dd8,
691        0x0e31, 0x0e32,
692        0x0e34, 0x0e40,
693        0x0e47, 0x0e4f,
694        0x0eb1, 0x0eb2,
695        0x0eb4, 0x0ebd,
696        0x0ec8, 0x0ed0,
697        0x0f18, 0x0f1a,
698        0x0f35, 0x0f36,
699        0x0f37, 0x0f38,
700        0x0f39, 0x0f3e,
701        0x0f71, 0x0f7f,
702        0x0f80, 0x0f85,
703        0x0f86, 0x0f88,
704        0x0f8d, 0x0fbe,
705        0x0fc6, 0x0fc7,
706        0x102d, 0x1031,
707        0x1032, 0x1038,
708        0x1039, 0x103b,
709        0x103d, 0x103f,
710        0x1058, 0x105a,
711        0x105e, 0x1061,
712        0x1071, 0x1075,
713        0x1082, 0x1083,
714        0x1085, 0x1087,
715        0x108d, 0x108e,
716        0x109d, 0x109e,
717        0x135d, 0x1360,
718        0x1390, 0x13a0,
719        0x1400, 0x1401,
720        0x1680, 0x1681,
721        0x169b, 0x16a0,
722        0x1712, 0x1720,
723        0x1732, 0x1735,
724        0x1752, 0x1760,
725        0x1772, 0x1780,
726        0x17b7, 0x17be,
727        0x17c6, 0x17c7,
728        0x17c9, 0x17d4,
729        0x17db, 0x17dc,
730        0x17dd, 0x17e0,
731        0x17f0, 0x1810,
732        0x18a9, 0x18aa,
733        0x1920, 0x1923,
734        0x1927, 0x1929,
735        0x1932, 0x1933,
736        0x1939, 0x1946,
737        0x19de, 0x1a00,
738        0x1a17, 0x1a19,
739        0x1a56, 0x1a57,
740        0x1a58, 0x1a61,
741        0x1a62, 0x1a63,
742        0x1a65, 0x1a6d,
743        0x1a73, 0x1a80,
744        0x1b00, 0x1b04,
745        0x1b34, 0x1b35,
746        0x1b36, 0x1b3b,
747        0x1b3c, 0x1b3d,
748        0x1b42, 0x1b43,
749        0x1b6b, 0x1b74,
750        0x1b80, 0x1b82,
751        0x1ba2, 0x1ba6,
752        0x1ba8, 0x1baa,
753        0x1be6, 0x1be7,
754        0x1be8, 0x1bea,
755        0x1bed, 0x1bee,
756        0x1bef, 0x1bf2,
757        0x1c2c, 0x1c34,
758        0x1c36, 0x1c3b,
759        0x1cd0, 0x1cd3,
760        0x1cd4, 0x1ce1,
761        0x1ce2, 0x1ce9,
762        0x1ced, 0x1cee,
763        0x1dc0, 0x1e00,
764        0x1fbd, 0x1fbe,
765        0x1fbf, 0x1fc2,
766        0x1fcd, 0x1fd0,
767        0x1fdd, 0x1fe0,
768        0x1fed, 0x1ff2,
769        0x1ffd, 0x200e,
770        0x2010, 0x2071,
771        0x2074, 0x207f,
772        0x2080, 0x2090,
773        0x20a0, 0x2102,
774        0x2103, 0x2107,
775        0x2108, 0x210a,
776        0x2114, 0x2115,
777        0x2116, 0x2119,
778        0x211e, 0x2124,
779        0x2125, 0x2126,
780        0x2127, 0x2128,
781        0x2129, 0x212a,
782        0x212e, 0x212f,
783        0x213a, 0x213c,
784        0x2140, 0x2145,
785        0x214a, 0x214e,
786        0x2150, 0x2160,
787        0x2189, 0x2336,
788        0x237b, 0x2395,
789        0x2396, 0x249c,
790        0x24ea, 0x26ac,
791        0x26ad, 0x2800,
792        0x2900, 0x2c00,
793        0x2ce5, 0x2ceb,
794        0x2cef, 0x2d00,
795        0x2d7f, 0x2d80,
796        0x2de0, 0x3005,
797        0x3008, 0x3021,
798        0x302a, 0x3031,
799        0x3036, 0x3038,
800        0x303d, 0x3041,
801        0x3099, 0x309d,
802        0x30a0, 0x30a1,
803        0x30fb, 0x30fc,
804        0x31c0, 0x31f0,
805        0x321d, 0x3220,
806        0x3250, 0x3260,
807        0x327c, 0x327f,
808        0x32b1, 0x32c0,
809        0x32cc, 0x32d0,
810        0x3377, 0x337b,
811        0x33de, 0x33e0,
812        0x33ff, 0x3400,
813        0x4dc0, 0x4e00,
814        0xa490, 0xa4d0,
815        0xa60d, 0xa610,
816        0xa66f, 0xa680,
817        0xa6f0, 0xa6f2,
818        0xa700, 0xa722,
819        0xa788, 0xa789,
820        0xa802, 0xa803,
821        0xa806, 0xa807,
822        0xa80b, 0xa80c,
823        0xa825, 0xa827,
824        0xa828, 0xa830,
825        0xa838, 0xa840,
826        0xa874, 0xa880,
827        0xa8c4, 0xa8ce,
828        0xa8e0, 0xa8f2,
829        0xa926, 0xa92e,
830        0xa947, 0xa952,
831        0xa980, 0xa983,
832        0xa9b3, 0xa9b4,
833        0xa9b6, 0xa9ba,
834        0xa9bc, 0xa9bd,
835        0xaa29, 0xaa2f,
836        0xaa31, 0xaa33,
837        0xaa35, 0xaa40,
838        0xaa43, 0xaa44,
839        0xaa4c, 0xaa4d,
840        0xaab0, 0xaab1,
841        0xaab2, 0xaab5,
842        0xaab7, 0xaab9,
843        0xaabe, 0xaac0,
844        0xaac1, 0xaac2,
845        0xabe5, 0xabe6,
846        0xabe8, 0xabe9,
847        0xabed, 0xabf0,
848        0xfb1e, 0xfb1f,
849        0xfb29, 0xfb2a,
850        0xfd3e, 0xfd50,
851        0xfdfd, 0xfe70,
852        0xfeff, 0xff21,
853        0xff3b, 0xff41,
854        0xff5b, 0xff66,
855        0xffe0, 0x10000,
856        0x10101, 0x10102,
857        0x10140, 0x101d0,
858        0x101fd, 0x10280,
859        0x1091f, 0x10920,
860        0x10a01, 0x10a10,
861        0x10a38, 0x10a40,
862        0x10b39, 0x10b40,
863        0x10e60, 0x11000,
864        0x11001, 0x11002,
865        0x11038, 0x11047,
866        0x11052, 0x11066,
867        0x11080, 0x11082,
868        0x110b3, 0x110b7,
869        0x110b9, 0x110bb,
870        0x1d167, 0x1d16a,
871        0x1d173, 0x1d183,
872        0x1d185, 0x1d18c,
873        0x1d1aa, 0x1d1ae,
874        0x1d200, 0x1d360,
875        0x1d6db, 0x1d6dc,
876        0x1d715, 0x1d716,
877        0x1d74f, 0x1d750,
878        0x1d789, 0x1d78a,
879        0x1d7c3, 0x1d7c4,
880        0x1d7ce, 0x1f110,
881        0x1f300, 0x1f48c,
882        0x1f48d, 0x1f524,
883        0x1f525, 0x20000,
884        0xe0001, 0xf0000,
885        0x10fffe, 0x10ffff // sentinel
886    };
887
888
889    // use a binary search with a cache
890
891    private transient volatile int stCache = 0;
892
893    private boolean isStrongDirectional(char c) {
894        int cachedIndex = stCache;
895        if (c < strongTable[cachedIndex]) {
896            cachedIndex = search(c, strongTable, 0, cachedIndex);
897        } else if (c >= strongTable[cachedIndex + 1]) {
898            cachedIndex = search(c, strongTable, cachedIndex + 1,
899                                 strongTable.length - cachedIndex - 1);
900        }
901        boolean val = (cachedIndex & 0x1) == 1;
902        stCache = cachedIndex;
903        return val;
904    }
905
906    private static int getKeyFromMask(int mask) {
907        int key = 0;
908        while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
909            ++key;
910        }
911        if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
912            throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
913        }
914        return key;
915    }
916
917    /**
918     * Returns a shaper for the provided unicode range.  All
919     * Latin-1 (EUROPEAN) digits are converted
920     * to the corresponding decimal unicode digits.
921     * @param singleRange the specified Unicode range
922     * @return a non-contextual numeric shaper
923     * @throws IllegalArgumentException if the range is not a single range
924     */
925    public static NumericShaper getShaper(int singleRange) {
926        int key = getKeyFromMask(singleRange);
927        return new NumericShaper(key, singleRange);
928    }
929
930    /**
931     * Returns a shaper for the provided Unicode
932     * range. All Latin-1 (EUROPEAN) digits are converted to the
933     * corresponding decimal digits of the specified Unicode range.
934     *
935     * @param singleRange the Unicode range given by a {@link
936     *                    NumericShaper.Range} constant.
937     * @return a non-contextual {@code NumericShaper}.
938     * @throws NullPointerException if {@code singleRange} is {@code null}
939     * @since 1.7
940     */
941    public static NumericShaper getShaper(Range singleRange) {
942        return new NumericShaper(singleRange, EnumSet.of(singleRange));
943    }
944
945    /**
946     * Returns a contextual shaper for the provided unicode range(s).
947     * Latin-1 (EUROPEAN) digits are converted to the decimal digits
948     * corresponding to the range of the preceding text, if the
949     * range is one of the provided ranges.  Multiple ranges are
950     * represented by or-ing the values together, such as,
951     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
952     * shaper assumes EUROPEAN as the starting context, that is, if
953     * EUROPEAN digits are encountered before any strong directional
954     * text in the string, the context is presumed to be EUROPEAN, and
955     * so the digits will not shape.
956     * @param ranges the specified Unicode ranges
957     * @return a shaper for the specified ranges
958     */
959    public static NumericShaper getContextualShaper(int ranges) {
960        ranges |= CONTEXTUAL_MASK;
961        return new NumericShaper(EUROPEAN_KEY, ranges);
962    }
963
964    /**
965     * Returns a contextual shaper for the provided Unicode
966     * range(s). The Latin-1 (EUROPEAN) digits are converted to the
967     * decimal digits corresponding to the range of the preceding
968     * text, if the range is one of the provided ranges.
969     *
970     * <p>The shaper assumes EUROPEAN as the starting context, that
971     * is, if EUROPEAN digits are encountered before any strong
972     * directional text in the string, the context is presumed to be
973     * EUROPEAN, and so the digits will not shape.
974     *
975     * @param ranges the specified Unicode ranges
976     * @return a contextual shaper for the specified ranges
977     * @throws NullPointerException if {@code ranges} is {@code null}.
978     * @since 1.7
979     */
980    public static NumericShaper getContextualShaper(Set<Range> ranges) {
981        NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
982        shaper.mask = CONTEXTUAL_MASK;
983        return shaper;
984    }
985
986    /**
987     * Returns a contextual shaper for the provided unicode range(s).
988     * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
989     * corresponding to the range of the preceding text, if the
990     * range is one of the provided ranges.  Multiple ranges are
991     * represented by or-ing the values together, for example,
992     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
993     * shaper uses defaultContext as the starting context.
994     * @param ranges the specified Unicode ranges
995     * @param defaultContext the starting context, such as
996     * <code>NumericShaper.EUROPEAN</code>
997     * @return a shaper for the specified Unicode ranges.
998     * @throws IllegalArgumentException if the specified
999     * <code>defaultContext</code> is not a single valid range.
1000     */
1001    public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
1002        int key = getKeyFromMask(defaultContext);
1003        ranges |= CONTEXTUAL_MASK;
1004        return new NumericShaper(key, ranges);
1005    }
1006
1007    /**
1008     * Returns a contextual shaper for the provided Unicode range(s).
1009     * The Latin-1 (EUROPEAN) digits will be converted to the decimal
1010     * digits corresponding to the range of the preceding text, if the
1011     * range is one of the provided ranges. The shaper uses {@code
1012     * defaultContext} as the starting context.
1013     *
1014     * @param ranges the specified Unicode ranges
1015     * @param defaultContext the starting context, such as
1016     *                       {@code NumericShaper.Range.EUROPEAN}
1017     * @return a contextual shaper for the specified Unicode ranges.
1018     * @throws NullPointerException
1019     *         if {@code ranges} or {@code defaultContext} is {@code null}
1020     * @since 1.7
1021     */
1022    public static NumericShaper getContextualShaper(Set<Range> ranges,
1023                                                    Range defaultContext) {
1024        if (defaultContext == null) {
1025            throw new NullPointerException();
1026        }
1027        NumericShaper shaper = new NumericShaper(defaultContext, ranges);
1028        shaper.mask = CONTEXTUAL_MASK;
1029        return shaper;
1030    }
1031
1032    /**
1033     * Private constructor.
1034     */
1035    private NumericShaper(int key, int mask) {
1036        this.key = key;
1037        this.mask = mask;
1038    }
1039
1040    private NumericShaper(Range defaultContext, Set<Range> ranges) {
1041        shapingRange = defaultContext;
1042        rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
1043
1044        // Give precedance to EASTERN_ARABIC if both ARABIC and
1045        // EASTERN_ARABIC are specified.
1046        if (rangeSet.contains(Range.EASTERN_ARABIC)
1047            && rangeSet.contains(Range.ARABIC)) {
1048            rangeSet.remove(Range.ARABIC);
1049        }
1050
1051        // As well as the above case, give precedance to TAI_THAM_THAM if both
1052        // TAI_THAM_HORA and TAI_THAM_THAM are specified.
1053        if (rangeSet.contains(Range.TAI_THAM_THAM)
1054            && rangeSet.contains(Range.TAI_THAM_HORA)) {
1055            rangeSet.remove(Range.TAI_THAM_HORA);
1056        }
1057
1058        rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
1059        if (rangeArray.length > BSEARCH_THRESHOLD) {
1060            // sort rangeArray for binary search
1061            Arrays.sort(rangeArray,
1062                        new Comparator<Range>() {
1063                            public int compare(Range s1, Range s2) {
1064                                return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
1065                            }
1066                        });
1067        }
1068    }
1069
1070    /**
1071     * Converts the digits in the text that occur between start and
1072     * start + count.
1073     * @param text an array of characters to convert
1074     * @param start the index into <code>text</code> to start
1075     *        converting
1076     * @param count the number of characters in <code>text</code>
1077     *        to convert
1078     * @throws IndexOutOfBoundsException if start or start + count is
1079     *        out of bounds
1080     * @throws NullPointerException if text is null
1081     */
1082    public void shape(char[] text, int start, int count) {
1083        checkParams(text, start, count);
1084        if (isContextual()) {
1085            if (rangeSet == null) {
1086                shapeContextually(text, start, count, key);
1087            } else {
1088                shapeContextually(text, start, count, shapingRange);
1089            }
1090        } else {
1091            shapeNonContextually(text, start, count);
1092        }
1093    }
1094
1095    /**
1096     * Converts the digits in the text that occur between start and
1097     * start + count, using the provided context.
1098     * Context is ignored if the shaper is not a contextual shaper.
1099     * @param text an array of characters
1100     * @param start the index into <code>text</code> to start
1101     *        converting
1102     * @param count the number of characters in <code>text</code>
1103     *        to convert
1104     * @param context the context to which to convert the
1105     *        characters, such as <code>NumericShaper.EUROPEAN</code>
1106     * @throws IndexOutOfBoundsException if start or start + count is
1107     *        out of bounds
1108     * @throws NullPointerException if text is null
1109     * @throws IllegalArgumentException if this is a contextual shaper
1110     * and the specified <code>context</code> is not a single valid
1111     * range.
1112     */
1113    public void shape(char[] text, int start, int count, int context) {
1114        checkParams(text, start, count);
1115        if (isContextual()) {
1116            int ctxKey = getKeyFromMask(context);
1117            if (rangeSet == null) {
1118                shapeContextually(text, start, count, ctxKey);
1119            } else {
1120                shapeContextually(text, start, count, Range.values()[ctxKey]);
1121            }
1122        } else {
1123            shapeNonContextually(text, start, count);
1124        }
1125    }
1126
1127    /**
1128     * Converts the digits in the text that occur between {@code
1129     * start} and {@code start + count}, using the provided {@code
1130     * context}. {@code Context} is ignored if the shaper is not a
1131     * contextual shaper.
1132     *
1133     * @param text  a {@code char} array
1134     * @param start the index into {@code text} to start converting
1135     * @param count the number of {@code char}s in {@code text}
1136     *              to convert
1137     * @param context the context to which to convert the characters,
1138     *                such as {@code NumericShaper.Range.EUROPEAN}
1139     * @throws IndexOutOfBoundsException
1140     *         if {@code start} or {@code start + count} is out of bounds
1141     * @throws NullPointerException
1142     *         if {@code text} or {@code context} is null
1143     * @since 1.7
1144     */
1145    public void shape(char[] text, int start, int count, Range context) {
1146        checkParams(text, start, count);
1147        if (context == null) {
1148            throw new NullPointerException("context is null");
1149        }
1150
1151        if (isContextual()) {
1152            if (rangeSet != null) {
1153                shapeContextually(text, start, count, context);
1154            } else {
1155                int key = Range.toRangeIndex(context);
1156                if (key >= 0) {
1157                    shapeContextually(text, start, count, key);
1158                } else {
1159                    shapeContextually(text, start, count, shapingRange);
1160                }
1161            }
1162        } else {
1163            shapeNonContextually(text, start, count);
1164        }
1165    }
1166
1167    private void checkParams(char[] text, int start, int count) {
1168        if (text == null) {
1169            throw new NullPointerException("text is null");
1170        }
1171        if ((start < 0)
1172            || (start > text.length)
1173            || ((start + count) < 0)
1174            || ((start + count) > text.length)) {
1175            throw new IndexOutOfBoundsException(
1176                "bad start or count for text of length " + text.length);
1177        }
1178    }
1179
1180    /**
1181     * Returns a <code>boolean</code> indicating whether or not
1182     * this shaper shapes contextually.
1183     * @return <code>true</code> if this shaper is contextual;
1184     *         <code>false</code> otherwise.
1185     */
1186    public boolean isContextual() {
1187        return (mask & CONTEXTUAL_MASK) != 0;
1188    }
1189
1190    /**
1191     * Returns an <code>int</code> that ORs together the values for
1192     * all the ranges that will be shaped.
1193     * <p>
1194     * For example, to check if a shaper shapes to Arabic, you would use the
1195     * following:
1196     * <blockquote>
1197     *   <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
1198     * </blockquote>
1199     *
1200     * <p>Note that this method supports only the bit mask-based
1201     * ranges. Call {@link #getRangeSet()} for the enum-based ranges.
1202     *
1203     * @return the values for all the ranges to be shaped.
1204     */
1205    public int getRanges() {
1206        return mask & ~CONTEXTUAL_MASK;
1207    }
1208
1209    /**
1210     * Returns a {@code Set} representing all the Unicode ranges in
1211     * this {@code NumericShaper} that will be shaped.
1212     *
1213     * @return all the Unicode ranges to be shaped.
1214     * @since 1.7
1215     */
1216    public Set<Range> getRangeSet() {
1217        if (rangeSet != null) {
1218            return EnumSet.copyOf(rangeSet);
1219        }
1220        return Range.maskToRangeSet(mask);
1221    }
1222
1223    /**
1224     * Perform non-contextual shaping.
1225     */
1226    private void shapeNonContextually(char[] text, int start, int count) {
1227        int base;
1228        char minDigit = '0';
1229        if (shapingRange != null) {
1230            base = shapingRange.getDigitBase();
1231            minDigit += shapingRange.getNumericBase();
1232        } else {
1233            base = bases[key];
1234            if (key == ETHIOPIC_KEY) {
1235                minDigit++; // Ethiopic doesn't use decimal zero
1236            }
1237        }
1238        for (int i = start, e = start + count; i < e; ++i) {
1239            char c = text[i];
1240            if (c >= minDigit && c <= '\u0039') {
1241                text[i] = (char)(c + base);
1242            }
1243        }
1244    }
1245
1246    /**
1247     * Perform contextual shaping.
1248     * Synchronized to protect caches used in getContextKey.
1249     */
1250    private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
1251
1252        // if we don't support this context, then don't shape
1253        if ((mask & (1<<ctxKey)) == 0) {
1254            ctxKey = EUROPEAN_KEY;
1255        }
1256        int lastkey = ctxKey;
1257
1258        int base = bases[ctxKey];
1259        char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1260
1261        synchronized (NumericShaper.class) {
1262            for (int i = start, e = start + count; i < e; ++i) {
1263                char c = text[i];
1264                if (c >= minDigit && c <= '\u0039') {
1265                    text[i] = (char)(c + base);
1266                }
1267
1268                if (isStrongDirectional(c)) {
1269                    int newkey = getContextKey(c);
1270                    if (newkey != lastkey) {
1271                        lastkey = newkey;
1272
1273                        ctxKey = newkey;
1274                        if (((mask & EASTERN_ARABIC) != 0) &&
1275                             (ctxKey == ARABIC_KEY ||
1276                              ctxKey == EASTERN_ARABIC_KEY)) {
1277                            ctxKey = EASTERN_ARABIC_KEY;
1278                        } else if (((mask & ARABIC) != 0) &&
1279                             (ctxKey == ARABIC_KEY ||
1280                              ctxKey == EASTERN_ARABIC_KEY)) {
1281                            ctxKey = ARABIC_KEY;
1282                        } else if ((mask & (1<<ctxKey)) == 0) {
1283                            ctxKey = EUROPEAN_KEY;
1284                        }
1285
1286                        base = bases[ctxKey];
1287
1288                        minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1289                    }
1290                }
1291            }
1292        }
1293    }
1294
1295    private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
1296        // if we don't support the specified context, then don't shape.
1297        if (ctxKey == null || !rangeSet.contains(ctxKey)) {
1298            ctxKey = Range.EUROPEAN;
1299        }
1300
1301        Range lastKey = ctxKey;
1302        int base = ctxKey.getDigitBase();
1303        char minDigit = (char)('0' + ctxKey.getNumericBase());
1304        final int end = start + count;
1305        for (int i = start; i < end; ++i) {
1306            char c = text[i];
1307            if (c >= minDigit && c <= '9') {
1308                text[i] = (char)(c + base);
1309                continue;
1310            }
1311            if (isStrongDirectional(c)) {
1312                ctxKey = rangeForCodePoint(c);
1313                if (ctxKey != lastKey) {
1314                    lastKey = ctxKey;
1315                    base = ctxKey.getDigitBase();
1316                    minDigit = (char)('0' + ctxKey.getNumericBase());
1317                }
1318            }
1319        }
1320    }
1321
1322    /**
1323     * Returns a hash code for this shaper.
1324     * @return this shaper's hash code.
1325     * @see java.lang.Object#hashCode
1326     */
1327    public int hashCode() {
1328        int hash = mask;
1329        if (rangeSet != null) {
1330            // Use the CONTEXTUAL_MASK bit only for the enum-based
1331            // NumericShaper. A deserialized NumericShaper might have
1332            // bit masks.
1333            hash &= CONTEXTUAL_MASK;
1334            hash ^= rangeSet.hashCode();
1335        }
1336        return hash;
1337    }
1338
1339    /**
1340     * Returns {@code true} if the specified object is an instance of
1341     * <code>NumericShaper</code> and shapes identically to this one,
1342     * regardless of the range representations, the bit mask or the
1343     * enum. For example, the following code produces {@code "true"}.
1344     * <blockquote><pre>
1345     * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
1346     * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
1347     * System.out.println(ns1.equals(ns2));
1348     * </pre></blockquote>
1349     *
1350     * @param o the specified object to compare to this
1351     *          <code>NumericShaper</code>
1352     * @return <code>true</code> if <code>o</code> is an instance
1353     *         of <code>NumericShaper</code> and shapes in the same way;
1354     *         <code>false</code> otherwise.
1355     * @see java.lang.Object#equals(java.lang.Object)
1356     */
1357    public boolean equals(Object o) {
1358        if (o != null) {
1359            try {
1360                NumericShaper rhs = (NumericShaper)o;
1361                if (rangeSet != null) {
1362                    if (rhs.rangeSet != null) {
1363                        return isContextual() == rhs.isContextual()
1364                            && rangeSet.equals(rhs.rangeSet)
1365                            && shapingRange == rhs.shapingRange;
1366                    }
1367                    return isContextual() == rhs.isContextual()
1368                        && rangeSet.equals(Range.maskToRangeSet(rhs.mask))
1369                        && shapingRange == Range.indexToRange(rhs.key);
1370                } else if (rhs.rangeSet != null) {
1371                    Set<Range> rset = Range.maskToRangeSet(mask);
1372                    Range srange = Range.indexToRange(key);
1373                    return isContextual() == rhs.isContextual()
1374                        && rset.equals(rhs.rangeSet)
1375                        && srange == rhs.shapingRange;
1376                }
1377                return rhs.mask == mask && rhs.key == key;
1378            }
1379            catch (ClassCastException e) {
1380            }
1381        }
1382        return false;
1383    }
1384
1385    /**
1386     * Returns a <code>String</code> that describes this shaper. This method
1387     * is used for debugging purposes only.
1388     * @return a <code>String</code> describing this shaper.
1389     */
1390    public String toString() {
1391        StringBuilder buf = new StringBuilder(super.toString());
1392
1393        buf.append("[contextual:").append(isContextual());
1394
1395        String[] keyNames = null;
1396        if (isContextual()) {
1397            buf.append(", context:");
1398            buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
1399        }
1400
1401        if (rangeSet == null) {
1402            buf.append(", range(s): ");
1403            boolean first = true;
1404            for (int i = 0; i < NUM_KEYS; ++i) {
1405                if ((mask & (1 << i)) != 0) {
1406                    if (first) {
1407                        first = false;
1408                    } else {
1409                        buf.append(", ");
1410                    }
1411                    buf.append(Range.values()[i]);
1412                }
1413            }
1414        } else {
1415            buf.append(", range set: ").append(rangeSet);
1416        }
1417        buf.append(']');
1418
1419        return buf.toString();
1420    }
1421
1422    /**
1423     * Returns the index of the high bit in value (assuming le, actually
1424     * power of 2 >= value). value must be positive.
1425     */
1426    private static int getHighBit(int value) {
1427        if (value <= 0) {
1428            return -32;
1429        }
1430
1431        int bit = 0;
1432
1433        if (value >= 1 << 16) {
1434            value >>= 16;
1435            bit += 16;
1436        }
1437
1438        if (value >= 1 << 8) {
1439            value >>= 8;
1440            bit += 8;
1441        }
1442
1443        if (value >= 1 << 4) {
1444            value >>= 4;
1445            bit += 4;
1446        }
1447
1448        if (value >= 1 << 2) {
1449            value >>= 2;
1450            bit += 2;
1451        }
1452
1453        if (value >= 1 << 1) {
1454            bit += 1;
1455        }
1456
1457        return bit;
1458    }
1459
1460    /**
1461     * fast binary search over subrange of array.
1462     */
1463    private static int search(int value, int[] array, int start, int length)
1464    {
1465        int power = 1 << getHighBit(length);
1466        int extra = length - power;
1467        int probe = power;
1468        int index = start;
1469
1470        if (value >= array[index + extra]) {
1471            index += extra;
1472        }
1473
1474        while (probe > 1) {
1475            probe >>= 1;
1476
1477            if (value >= array[index + probe]) {
1478                index += probe;
1479            }
1480        }
1481
1482        return index;
1483    }
1484
1485    /**
1486     * Converts the {@code NumericShaper.Range} enum-based parameters,
1487     * if any, to the bit mask-based counterparts and writes this
1488     * object to the {@code stream}. Any enum constants that have no
1489     * bit mask-based counterparts are ignored in the conversion.
1490     *
1491     * @param stream the output stream to write to
1492     * @throws IOException if an I/O error occurs while writing to {@code stream}
1493     * @since 1.7
1494     */
1495    private void writeObject(ObjectOutputStream stream) throws IOException {
1496        if (shapingRange != null) {
1497            int index = Range.toRangeIndex(shapingRange);
1498            if (index >= 0) {
1499                key = index;
1500            }
1501        }
1502        if (rangeSet != null) {
1503            mask |= Range.toRangeMask(rangeSet);
1504        }
1505        stream.defaultWriteObject();
1506    }
1507}
1508