1/* 2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package java.awt.font; 27 28import java.io.IOException; 29import java.io.ObjectOutputStream; 30import java.util.Arrays; 31import java.util.Comparator; 32import java.util.EnumSet; 33import java.util.Set; 34 35/** 36 * The <code>NumericShaper</code> class is used to convert Latin-1 (European) 37 * digits to other Unicode decimal digits. Users of this class will 38 * primarily be people who wish to present data using 39 * national digit shapes, but find it more convenient to represent the 40 * data internally using Latin-1 (European) digits. This does not 41 * interpret the deprecated numeric shape selector character (U+206E). 42 * <p> 43 * Instances of <code>NumericShaper</code> are typically applied 44 * as attributes to text with the 45 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute 46 * of the <code>TextAttribute</code> class. 47 * For example, this code snippet causes a <code>TextLayout</code> to 48 * shape European digits to Arabic in an Arabic context:<br> 49 * <blockquote><pre> 50 * Map map = new HashMap(); 51 * map.put(TextAttribute.NUMERIC_SHAPING, 52 * NumericShaper.getContextualShaper(NumericShaper.ARABIC)); 53 * FontRenderContext frc = ...; 54 * TextLayout layout = new TextLayout(text, map, frc); 55 * layout.draw(g2d, x, y); 56 * </pre></blockquote> 57 * <br> 58 * It is also possible to perform numeric shaping explicitly using instances 59 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br> 60 * <blockquote><pre> 61 * char[] text = ...; 62 * // shape all EUROPEAN digits (except zero) to ARABIC digits 63 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC); 64 * shaper.shape(text, start, count); 65 * 66 * // shape European digits to ARABIC digits if preceding text is Arabic, or 67 * // shape European digits to TAMIL digits if preceding text is Tamil, or 68 * // leave European digits alone if there is no preceding text, or 69 * // preceding text is neither Arabic nor Tamil 70 * NumericShaper shaper = 71 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 72 * NumericShaper.TAMIL, 73 * NumericShaper.EUROPEAN); 74 * shaper.shape(text, start, count); 75 * </pre></blockquote> 76 * 77 * <p><b>Bit mask- and enum-based Unicode ranges</b></p> 78 * 79 * <p>This class supports two different programming interfaces to 80 * represent Unicode ranges for script-specific digits: bit 81 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and 82 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}. 83 * Multiple ranges can be specified by ORing bit mask-based constants, 84 * such as: 85 * <blockquote><pre> 86 * NumericShaper.ARABIC | NumericShaper.TAMIL 87 * </pre></blockquote> 88 * or creating a {@code Set} with the {@link NumericShaper.Range} 89 * constants, such as: 90 * <blockquote><pre> 91 * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL) 92 * </pre></blockquote> 93 * The enum-based ranges are a super set of the bit mask-based ones. 94 * 95 * <p>If the two interfaces are mixed (including serialization), 96 * Unicode range values are mapped to their counterparts where such 97 * mapping is possible, such as {@code NumericShaper.Range.ARABIC} 98 * from/to {@code NumericShaper.ARABIC}. If any unmappable range 99 * values are specified, such as {@code NumericShaper.Range.BALINESE}, 100 * those ranges are ignored. 101 * 102 * <p><b>Decimal Digits Precedence</b></p> 103 * 104 * <p>A Unicode range may have more than one set of decimal digits. If 105 * multiple decimal digits sets are specified for the same Unicode 106 * range, one of the sets will take precedence as follows. 107 * 108 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence."> 109 * <tr> 110 * <th class="TableHeadingColor">Unicode Range</th> 111 * <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th> 112 * <th class="TableHeadingColor">Precedence</th> 113 * </tr> 114 * <tr> 115 * <td rowspan="2">Arabic</td> 116 * <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br> 117 * {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 118 * <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 119 * </tr> 120 * <tr> 121 * <td>{@link NumericShaper.Range#ARABIC}<br> 122 * {@link NumericShaper.Range#EASTERN_ARABIC}</td> 123 * <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td> 124 * </tr> 125 * <tr> 126 * <td>Tai Tham</td> 127 * <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br> 128 * {@link NumericShaper.Range#TAI_THAM_THAM}</td> 129 * <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td> 130 * </tr> 131 * </table> 132 * 133 * @since 1.4 134 */ 135 136public final class NumericShaper implements java.io.Serializable { 137 /** 138 * A {@code NumericShaper.Range} represents a Unicode range of a 139 * script having its own decimal digits. For example, the {@link 140 * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT 141 * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59). 142 * 143 * <p>The <code>Range</code> enum replaces the traditional bit 144 * mask-based values (e.g., {@link NumericShaper#ARABIC}), and 145 * supports more Unicode ranges than the bit mask-based ones. For 146 * example, the following code using the bit mask: 147 * <blockquote><pre> 148 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 149 * NumericShaper.TAMIL, 150 * NumericShaper.EUROPEAN); 151 * </pre></blockquote> 152 * can be written using this enum as: 153 * <blockquote><pre> 154 * NumericShaper.getContextualShaper(EnumSet.of( 155 * NumericShaper.Range.ARABIC, 156 * NumericShaper.Range.TAMIL), 157 * NumericShaper.Range.EUROPEAN); 158 * </pre></blockquote> 159 * 160 * @since 1.7 161 */ 162 public static enum Range { 163 // The order of EUROPEAN to MOGOLIAN must be consistent 164 // with the bitmask-based constants. 165 /** 166 * The Latin (European) range with the Latin (ASCII) digits. 167 */ 168 EUROPEAN ('\u0030', '\u0000', '\u0300'), 169 /** 170 * The Arabic range with the Arabic-Indic digits. 171 */ 172 ARABIC ('\u0660', '\u0600', '\u0780'), 173 /** 174 * The Arabic range with the Eastern Arabic-Indic digits. 175 */ 176 EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'), 177 /** 178 * The Devanagari range with the Devanagari digits. 179 */ 180 DEVANAGARI ('\u0966', '\u0900', '\u0980'), 181 /** 182 * The Bengali range with the Bengali digits. 183 */ 184 BENGALI ('\u09e6', '\u0980', '\u0a00'), 185 /** 186 * The Gurmukhi range with the Gurmukhi digits. 187 */ 188 GURMUKHI ('\u0a66', '\u0a00', '\u0a80'), 189 /** 190 * The Gujarati range with the Gujarati digits. 191 */ 192 GUJARATI ('\u0ae6', '\u0b00', '\u0b80'), 193 /** 194 * The Oriya range with the Oriya digits. 195 */ 196 ORIYA ('\u0b66', '\u0b00', '\u0b80'), 197 /** 198 * The Tamil range with the Tamil digits. 199 */ 200 TAMIL ('\u0be6', '\u0b80', '\u0c00'), 201 /** 202 * The Telugu range with the Telugu digits. 203 */ 204 TELUGU ('\u0c66', '\u0c00', '\u0c80'), 205 /** 206 * The Kannada range with the Kannada digits. 207 */ 208 KANNADA ('\u0ce6', '\u0c80', '\u0d00'), 209 /** 210 * The Malayalam range with the Malayalam digits. 211 */ 212 MALAYALAM ('\u0d66', '\u0d00', '\u0d80'), 213 /** 214 * The Thai range with the Thai digits. 215 */ 216 THAI ('\u0e50', '\u0e00', '\u0e80'), 217 /** 218 * The Lao range with the Lao digits. 219 */ 220 LAO ('\u0ed0', '\u0e80', '\u0f00'), 221 /** 222 * The Tibetan range with the Tibetan digits. 223 */ 224 TIBETAN ('\u0f20', '\u0f00', '\u1000'), 225 /** 226 * The Myanmar range with the Myanmar digits. 227 */ 228 MYANMAR ('\u1040', '\u1000', '\u1080'), 229 /** 230 * The Ethiopic range with the Ethiopic digits. Ethiopic 231 * does not have a decimal digit 0 so Latin (European) 0 is 232 * used. 233 */ 234 ETHIOPIC ('\u1369', '\u1200', '\u1380') { 235 @Override 236 char getNumericBase() { return 1; } 237 }, 238 /** 239 * The Khmer range with the Khmer digits. 240 */ 241 KHMER ('\u17e0', '\u1780', '\u1800'), 242 /** 243 * The Mongolian range with the Mongolian digits. 244 */ 245 MONGOLIAN ('\u1810', '\u1800', '\u1900'), 246 // The order of EUROPEAN to MOGOLIAN must be consistent 247 // with the bitmask-based constants. 248 249 /** 250 * The N'Ko range with the N'Ko digits. 251 */ 252 NKO ('\u07c0', '\u07c0', '\u0800'), 253 /** 254 * The Myanmar range with the Myanmar Shan digits. 255 */ 256 MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'), 257 /** 258 * The Limbu range with the Limbu digits. 259 */ 260 LIMBU ('\u1946', '\u1900', '\u1950'), 261 /** 262 * The New Tai Lue range with the New Tai Lue digits. 263 */ 264 NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'), 265 /** 266 * The Balinese range with the Balinese digits. 267 */ 268 BALINESE ('\u1b50', '\u1b00', '\u1b80'), 269 /** 270 * The Sundanese range with the Sundanese digits. 271 */ 272 SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'), 273 /** 274 * The Lepcha range with the Lepcha digits. 275 */ 276 LEPCHA ('\u1c40', '\u1c00', '\u1c50'), 277 /** 278 * The Ol Chiki range with the Ol Chiki digits. 279 */ 280 OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'), 281 /** 282 * The Vai range with the Vai digits. 283 */ 284 VAI ('\ua620', '\ua500', '\ua640'), 285 /** 286 * The Saurashtra range with the Saurashtra digits. 287 */ 288 SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'), 289 /** 290 * The Kayah Li range with the Kayah Li digits. 291 */ 292 KAYAH_LI ('\ua900', '\ua900', '\ua930'), 293 /** 294 * The Cham range with the Cham digits. 295 */ 296 CHAM ('\uaa50', '\uaa00', '\uaa60'), 297 /** 298 * The Tai Tham Hora range with the Tai Tham Hora digits. 299 */ 300 TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'), 301 /** 302 * The Tai Tham Tham range with the Tai Tham Tham digits. 303 */ 304 TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'), 305 /** 306 * The Javanese range with the Javanese digits. 307 */ 308 JAVANESE ('\ua9d0', '\ua980', '\ua9e0'), 309 /** 310 * The Meetei Mayek range with the Meetei Mayek digits. 311 */ 312 MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00'); 313 314 private static int toRangeIndex(Range script) { 315 int index = script.ordinal(); 316 return index < NUM_KEYS ? index : -1; 317 } 318 319 private static Range indexToRange(int index) { 320 return index < NUM_KEYS ? Range.values()[index] : null; 321 } 322 323 private static int toRangeMask(Set<Range> ranges) { 324 int m = 0; 325 for (Range range : ranges) { 326 int index = range.ordinal(); 327 if (index < NUM_KEYS) { 328 m |= 1 << index; 329 } 330 } 331 return m; 332 } 333 334 private static Set<Range> maskToRangeSet(int mask) { 335 Set<Range> set = EnumSet.noneOf(Range.class); 336 Range[] a = Range.values(); 337 for (int i = 0; i < NUM_KEYS; i++) { 338 if ((mask & (1 << i)) != 0) { 339 set.add(a[i]); 340 } 341 } 342 return set; 343 } 344 345 // base character of range digits 346 private final int base; 347 // Unicode range 348 private final int start, // inclusive 349 end; // exclusive 350 351 private Range(int base, int start, int end) { 352 this.base = base - ('0' + getNumericBase()); 353 this.start = start; 354 this.end = end; 355 } 356 357 private int getDigitBase() { 358 return base; 359 } 360 361 char getNumericBase() { 362 return 0; 363 } 364 365 private boolean inRange(int c) { 366 return start <= c && c < end; 367 } 368 } 369 370 /** index of context for contextual shaping - values range from 0 to 18 */ 371 private int key; 372 373 /** flag indicating whether to shape contextually (high bit) and which 374 * digit ranges to shape (bits 0-18) 375 */ 376 private int mask; 377 378 /** 379 * The context {@code Range} for contextual shaping or the {@code 380 * Range} for non-contextual shaping. {@code null} for the bit 381 * mask-based API. 382 * 383 * @since 1.7 384 */ 385 private Range shapingRange; 386 387 /** 388 * {@code Set<Range>} indicating which Unicode ranges to 389 * shape. {@code null} for the bit mask-based API. 390 */ 391 private transient Set<Range> rangeSet; 392 393 /** 394 * rangeSet.toArray() value. Sorted by Range.base when the number 395 * of elements is greater then BSEARCH_THRESHOLD. 396 */ 397 private transient Range[] rangeArray; 398 399 /** 400 * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used. 401 */ 402 private static final int BSEARCH_THRESHOLD = 3; 403 404 private static final long serialVersionUID = -8022764705923730308L; 405 406 /** Identifies the Latin-1 (European) and extended range, and 407 * Latin-1 (European) decimal base. 408 */ 409 public static final int EUROPEAN = 1<<0; 410 411 /** Identifies the ARABIC range and decimal base. */ 412 public static final int ARABIC = 1<<1; 413 414 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */ 415 public static final int EASTERN_ARABIC = 1<<2; 416 417 /** Identifies the DEVANAGARI range and decimal base. */ 418 public static final int DEVANAGARI = 1<<3; 419 420 /** Identifies the BENGALI range and decimal base. */ 421 public static final int BENGALI = 1<<4; 422 423 /** Identifies the GURMUKHI range and decimal base. */ 424 public static final int GURMUKHI = 1<<5; 425 426 /** Identifies the GUJARATI range and decimal base. */ 427 public static final int GUJARATI = 1<<6; 428 429 /** Identifies the ORIYA range and decimal base. */ 430 public static final int ORIYA = 1<<7; 431 432 /** Identifies the TAMIL range and decimal base. */ 433 // TAMIL DIGIT ZERO was added in Unicode 4.1 434 public static final int TAMIL = 1<<8; 435 436 /** Identifies the TELUGU range and decimal base. */ 437 public static final int TELUGU = 1<<9; 438 439 /** Identifies the KANNADA range and decimal base. */ 440 public static final int KANNADA = 1<<10; 441 442 /** Identifies the MALAYALAM range and decimal base. */ 443 public static final int MALAYALAM = 1<<11; 444 445 /** Identifies the THAI range and decimal base. */ 446 public static final int THAI = 1<<12; 447 448 /** Identifies the LAO range and decimal base. */ 449 public static final int LAO = 1<<13; 450 451 /** Identifies the TIBETAN range and decimal base. */ 452 public static final int TIBETAN = 1<<14; 453 454 /** Identifies the MYANMAR range and decimal base. */ 455 public static final int MYANMAR = 1<<15; 456 457 /** Identifies the ETHIOPIC range and decimal base. */ 458 public static final int ETHIOPIC = 1<<16; 459 460 /** Identifies the KHMER range and decimal base. */ 461 public static final int KHMER = 1<<17; 462 463 /** Identifies the MONGOLIAN range and decimal base. */ 464 public static final int MONGOLIAN = 1<<18; 465 466 /** Identifies all ranges, for full contextual shaping. 467 * 468 * <p>This constant specifies all of the bit mask-based 469 * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to 470 * specify all of the enum-based ranges. 471 */ 472 public static final int ALL_RANGES = 0x0007ffff; 473 474 private static final int EUROPEAN_KEY = 0; 475 private static final int ARABIC_KEY = 1; 476 private static final int EASTERN_ARABIC_KEY = 2; 477 private static final int DEVANAGARI_KEY = 3; 478 private static final int BENGALI_KEY = 4; 479 private static final int GURMUKHI_KEY = 5; 480 private static final int GUJARATI_KEY = 6; 481 private static final int ORIYA_KEY = 7; 482 private static final int TAMIL_KEY = 8; 483 private static final int TELUGU_KEY = 9; 484 private static final int KANNADA_KEY = 10; 485 private static final int MALAYALAM_KEY = 11; 486 private static final int THAI_KEY = 12; 487 private static final int LAO_KEY = 13; 488 private static final int TIBETAN_KEY = 14; 489 private static final int MYANMAR_KEY = 15; 490 private static final int ETHIOPIC_KEY = 16; 491 private static final int KHMER_KEY = 17; 492 private static final int MONGOLIAN_KEY = 18; 493 494 private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed 495 496 private static final int CONTEXTUAL_MASK = 1<<31; 497 498 private static final char[] bases = { 499 '\u0030' - '\u0030', // EUROPEAN 500 '\u0660' - '\u0030', // ARABIC-INDIC 501 '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC) 502 '\u0966' - '\u0030', // DEVANAGARI 503 '\u09e6' - '\u0030', // BENGALI 504 '\u0a66' - '\u0030', // GURMUKHI 505 '\u0ae6' - '\u0030', // GUJARATI 506 '\u0b66' - '\u0030', // ORIYA 507 '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1 508 '\u0c66' - '\u0030', // TELUGU 509 '\u0ce6' - '\u0030', // KANNADA 510 '\u0d66' - '\u0030', // MALAYALAM 511 '\u0e50' - '\u0030', // THAI 512 '\u0ed0' - '\u0030', // LAO 513 '\u0f20' - '\u0030', // TIBETAN 514 '\u1040' - '\u0030', // MYANMAR 515 '\u1369' - '\u0031', // ETHIOPIC - no zero 516 '\u17e0' - '\u0030', // KHMER 517 '\u1810' - '\u0030', // MONGOLIAN 518 }; 519 520 // some ranges adjoin or overlap, rethink if we want to do a binary search on this 521 522 private static final char[] contexts = { 523 '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) 524 '\u0600', '\u0780', // ARABIC 525 '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic 526 '\u0900', '\u0980', // DEVANAGARI 527 '\u0980', '\u0a00', // BENGALI 528 '\u0a00', '\u0a80', // GURMUKHI 529 '\u0a80', '\u0b00', // GUJARATI 530 '\u0b00', '\u0b80', // ORIYA 531 '\u0b80', '\u0c00', // TAMIL 532 '\u0c00', '\u0c80', // TELUGU 533 '\u0c80', '\u0d00', // KANNADA 534 '\u0d00', '\u0d80', // MALAYALAM 535 '\u0e00', '\u0e80', // THAI 536 '\u0e80', '\u0f00', // LAO 537 '\u0f00', '\u1000', // TIBETAN 538 '\u1000', '\u1080', // MYANMAR 539 '\u1200', '\u1380', // ETHIOPIC - note missing zero 540 '\u1780', '\u1800', // KHMER 541 '\u1800', '\u1900', // MONGOLIAN 542 '\uffff', 543 }; 544 545 // assume most characters are near each other so probing the cache is infrequent, 546 // and a linear probe is ok. 547 548 private static int ctCache = 0; 549 private static int ctCacheLimit = contexts.length - 2; 550 551 // warning, synchronize access to this as it modifies state 552 private static int getContextKey(char c) { 553 if (c < contexts[ctCache]) { 554 while (ctCache > 0 && c < contexts[ctCache]) --ctCache; 555 } else if (c >= contexts[ctCache + 1]) { 556 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache; 557 } 558 559 // if we're not in a known range, then return EUROPEAN as the range key 560 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; 561 } 562 563 // cache for the NumericShaper.Range version 564 private transient volatile Range currentRange = Range.EUROPEAN; 565 566 private Range rangeForCodePoint(final int codepoint) { 567 if (currentRange.inRange(codepoint)) { 568 return currentRange; 569 } 570 571 final Range[] ranges = rangeArray; 572 if (ranges.length > BSEARCH_THRESHOLD) { 573 int lo = 0; 574 int hi = ranges.length - 1; 575 while (lo <= hi) { 576 int mid = (lo + hi) / 2; 577 Range range = ranges[mid]; 578 if (codepoint < range.start) { 579 hi = mid - 1; 580 } else if (codepoint >= range.end) { 581 lo = mid + 1; 582 } else { 583 currentRange = range; 584 return range; 585 } 586 } 587 } else { 588 for (int i = 0; i < ranges.length; i++) { 589 if (ranges[i].inRange(codepoint)) { 590 return ranges[i]; 591 } 592 } 593 } 594 return Range.EUROPEAN; 595 } 596 597 /* 598 * A range table of strong directional characters (types L, R, AL). 599 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined) 600 * characters, odd (right) indexes are starts of ranges of strong directional 601 * characters. 602 */ 603 private static int[] strongTable = { 604 0x0000, 0x0041, 605 0x005b, 0x0061, 606 0x007b, 0x00aa, 607 0x00ab, 0x00b5, 608 0x00b6, 0x00ba, 609 0x00bb, 0x00c0, 610 0x00d7, 0x00d8, 611 0x00f7, 0x00f8, 612 0x02b9, 0x02bb, 613 0x02c2, 0x02d0, 614 0x02d2, 0x02e0, 615 0x02e5, 0x02ee, 616 0x02ef, 0x0370, 617 0x0374, 0x0376, 618 0x037e, 0x0386, 619 0x0387, 0x0388, 620 0x03f6, 0x03f7, 621 0x0483, 0x048a, 622 0x058a, 0x05be, 623 0x05bf, 0x05c0, 624 0x05c1, 0x05c3, 625 0x05c4, 0x05c6, 626 0x05c7, 0x05d0, 627 0x0600, 0x0608, 628 0x0609, 0x060b, 629 0x060c, 0x060d, 630 0x060e, 0x061b, 631 0x064b, 0x066d, 632 0x0670, 0x0671, 633 0x06d6, 0x06e5, 634 0x06e7, 0x06ee, 635 0x06f0, 0x06fa, 636 0x070f, 0x0710, 637 0x0711, 0x0712, 638 0x0730, 0x074d, 639 0x07a6, 0x07b1, 640 0x07eb, 0x07f4, 641 0x07f6, 0x07fa, 642 0x0816, 0x081a, 643 0x081b, 0x0824, 644 0x0825, 0x0828, 645 0x0829, 0x0830, 646 0x0859, 0x085e, 647 0x0900, 0x0903, 648 0x093a, 0x093b, 649 0x093c, 0x093d, 650 0x0941, 0x0949, 651 0x094d, 0x094e, 652 0x0951, 0x0958, 653 0x0962, 0x0964, 654 0x0981, 0x0982, 655 0x09bc, 0x09bd, 656 0x09c1, 0x09c7, 657 0x09cd, 0x09ce, 658 0x09e2, 0x09e6, 659 0x09f2, 0x09f4, 660 0x09fb, 0x0a03, 661 0x0a3c, 0x0a3e, 662 0x0a41, 0x0a59, 663 0x0a70, 0x0a72, 664 0x0a75, 0x0a83, 665 0x0abc, 0x0abd, 666 0x0ac1, 0x0ac9, 667 0x0acd, 0x0ad0, 668 0x0ae2, 0x0ae6, 669 0x0af1, 0x0b02, 670 0x0b3c, 0x0b3d, 671 0x0b3f, 0x0b40, 672 0x0b41, 0x0b47, 673 0x0b4d, 0x0b57, 674 0x0b62, 0x0b66, 675 0x0b82, 0x0b83, 676 0x0bc0, 0x0bc1, 677 0x0bcd, 0x0bd0, 678 0x0bf3, 0x0c01, 679 0x0c3e, 0x0c41, 680 0x0c46, 0x0c58, 681 0x0c62, 0x0c66, 682 0x0c78, 0x0c7f, 683 0x0cbc, 0x0cbd, 684 0x0ccc, 0x0cd5, 685 0x0ce2, 0x0ce6, 686 0x0d41, 0x0d46, 687 0x0d4d, 0x0d4e, 688 0x0d62, 0x0d66, 689 0x0dca, 0x0dcf, 690 0x0dd2, 0x0dd8, 691 0x0e31, 0x0e32, 692 0x0e34, 0x0e40, 693 0x0e47, 0x0e4f, 694 0x0eb1, 0x0eb2, 695 0x0eb4, 0x0ebd, 696 0x0ec8, 0x0ed0, 697 0x0f18, 0x0f1a, 698 0x0f35, 0x0f36, 699 0x0f37, 0x0f38, 700 0x0f39, 0x0f3e, 701 0x0f71, 0x0f7f, 702 0x0f80, 0x0f85, 703 0x0f86, 0x0f88, 704 0x0f8d, 0x0fbe, 705 0x0fc6, 0x0fc7, 706 0x102d, 0x1031, 707 0x1032, 0x1038, 708 0x1039, 0x103b, 709 0x103d, 0x103f, 710 0x1058, 0x105a, 711 0x105e, 0x1061, 712 0x1071, 0x1075, 713 0x1082, 0x1083, 714 0x1085, 0x1087, 715 0x108d, 0x108e, 716 0x109d, 0x109e, 717 0x135d, 0x1360, 718 0x1390, 0x13a0, 719 0x1400, 0x1401, 720 0x1680, 0x1681, 721 0x169b, 0x16a0, 722 0x1712, 0x1720, 723 0x1732, 0x1735, 724 0x1752, 0x1760, 725 0x1772, 0x1780, 726 0x17b7, 0x17be, 727 0x17c6, 0x17c7, 728 0x17c9, 0x17d4, 729 0x17db, 0x17dc, 730 0x17dd, 0x17e0, 731 0x17f0, 0x1810, 732 0x18a9, 0x18aa, 733 0x1920, 0x1923, 734 0x1927, 0x1929, 735 0x1932, 0x1933, 736 0x1939, 0x1946, 737 0x19de, 0x1a00, 738 0x1a17, 0x1a19, 739 0x1a56, 0x1a57, 740 0x1a58, 0x1a61, 741 0x1a62, 0x1a63, 742 0x1a65, 0x1a6d, 743 0x1a73, 0x1a80, 744 0x1b00, 0x1b04, 745 0x1b34, 0x1b35, 746 0x1b36, 0x1b3b, 747 0x1b3c, 0x1b3d, 748 0x1b42, 0x1b43, 749 0x1b6b, 0x1b74, 750 0x1b80, 0x1b82, 751 0x1ba2, 0x1ba6, 752 0x1ba8, 0x1baa, 753 0x1be6, 0x1be7, 754 0x1be8, 0x1bea, 755 0x1bed, 0x1bee, 756 0x1bef, 0x1bf2, 757 0x1c2c, 0x1c34, 758 0x1c36, 0x1c3b, 759 0x1cd0, 0x1cd3, 760 0x1cd4, 0x1ce1, 761 0x1ce2, 0x1ce9, 762 0x1ced, 0x1cee, 763 0x1dc0, 0x1e00, 764 0x1fbd, 0x1fbe, 765 0x1fbf, 0x1fc2, 766 0x1fcd, 0x1fd0, 767 0x1fdd, 0x1fe0, 768 0x1fed, 0x1ff2, 769 0x1ffd, 0x200e, 770 0x2010, 0x2071, 771 0x2074, 0x207f, 772 0x2080, 0x2090, 773 0x20a0, 0x2102, 774 0x2103, 0x2107, 775 0x2108, 0x210a, 776 0x2114, 0x2115, 777 0x2116, 0x2119, 778 0x211e, 0x2124, 779 0x2125, 0x2126, 780 0x2127, 0x2128, 781 0x2129, 0x212a, 782 0x212e, 0x212f, 783 0x213a, 0x213c, 784 0x2140, 0x2145, 785 0x214a, 0x214e, 786 0x2150, 0x2160, 787 0x2189, 0x2336, 788 0x237b, 0x2395, 789 0x2396, 0x249c, 790 0x24ea, 0x26ac, 791 0x26ad, 0x2800, 792 0x2900, 0x2c00, 793 0x2ce5, 0x2ceb, 794 0x2cef, 0x2d00, 795 0x2d7f, 0x2d80, 796 0x2de0, 0x3005, 797 0x3008, 0x3021, 798 0x302a, 0x3031, 799 0x3036, 0x3038, 800 0x303d, 0x3041, 801 0x3099, 0x309d, 802 0x30a0, 0x30a1, 803 0x30fb, 0x30fc, 804 0x31c0, 0x31f0, 805 0x321d, 0x3220, 806 0x3250, 0x3260, 807 0x327c, 0x327f, 808 0x32b1, 0x32c0, 809 0x32cc, 0x32d0, 810 0x3377, 0x337b, 811 0x33de, 0x33e0, 812 0x33ff, 0x3400, 813 0x4dc0, 0x4e00, 814 0xa490, 0xa4d0, 815 0xa60d, 0xa610, 816 0xa66f, 0xa680, 817 0xa6f0, 0xa6f2, 818 0xa700, 0xa722, 819 0xa788, 0xa789, 820 0xa802, 0xa803, 821 0xa806, 0xa807, 822 0xa80b, 0xa80c, 823 0xa825, 0xa827, 824 0xa828, 0xa830, 825 0xa838, 0xa840, 826 0xa874, 0xa880, 827 0xa8c4, 0xa8ce, 828 0xa8e0, 0xa8f2, 829 0xa926, 0xa92e, 830 0xa947, 0xa952, 831 0xa980, 0xa983, 832 0xa9b3, 0xa9b4, 833 0xa9b6, 0xa9ba, 834 0xa9bc, 0xa9bd, 835 0xaa29, 0xaa2f, 836 0xaa31, 0xaa33, 837 0xaa35, 0xaa40, 838 0xaa43, 0xaa44, 839 0xaa4c, 0xaa4d, 840 0xaab0, 0xaab1, 841 0xaab2, 0xaab5, 842 0xaab7, 0xaab9, 843 0xaabe, 0xaac0, 844 0xaac1, 0xaac2, 845 0xabe5, 0xabe6, 846 0xabe8, 0xabe9, 847 0xabed, 0xabf0, 848 0xfb1e, 0xfb1f, 849 0xfb29, 0xfb2a, 850 0xfd3e, 0xfd50, 851 0xfdfd, 0xfe70, 852 0xfeff, 0xff21, 853 0xff3b, 0xff41, 854 0xff5b, 0xff66, 855 0xffe0, 0x10000, 856 0x10101, 0x10102, 857 0x10140, 0x101d0, 858 0x101fd, 0x10280, 859 0x1091f, 0x10920, 860 0x10a01, 0x10a10, 861 0x10a38, 0x10a40, 862 0x10b39, 0x10b40, 863 0x10e60, 0x11000, 864 0x11001, 0x11002, 865 0x11038, 0x11047, 866 0x11052, 0x11066, 867 0x11080, 0x11082, 868 0x110b3, 0x110b7, 869 0x110b9, 0x110bb, 870 0x1d167, 0x1d16a, 871 0x1d173, 0x1d183, 872 0x1d185, 0x1d18c, 873 0x1d1aa, 0x1d1ae, 874 0x1d200, 0x1d360, 875 0x1d6db, 0x1d6dc, 876 0x1d715, 0x1d716, 877 0x1d74f, 0x1d750, 878 0x1d789, 0x1d78a, 879 0x1d7c3, 0x1d7c4, 880 0x1d7ce, 0x1f110, 881 0x1f300, 0x1f48c, 882 0x1f48d, 0x1f524, 883 0x1f525, 0x20000, 884 0xe0001, 0xf0000, 885 0x10fffe, 0x10ffff // sentinel 886 }; 887 888 889 // use a binary search with a cache 890 891 private transient volatile int stCache = 0; 892 893 private boolean isStrongDirectional(char c) { 894 int cachedIndex = stCache; 895 if (c < strongTable[cachedIndex]) { 896 cachedIndex = search(c, strongTable, 0, cachedIndex); 897 } else if (c >= strongTable[cachedIndex + 1]) { 898 cachedIndex = search(c, strongTable, cachedIndex + 1, 899 strongTable.length - cachedIndex - 1); 900 } 901 boolean val = (cachedIndex & 0x1) == 1; 902 stCache = cachedIndex; 903 return val; 904 } 905 906 private static int getKeyFromMask(int mask) { 907 int key = 0; 908 while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { 909 ++key; 910 } 911 if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) { 912 throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask)); 913 } 914 return key; 915 } 916 917 /** 918 * Returns a shaper for the provided unicode range. All 919 * Latin-1 (EUROPEAN) digits are converted 920 * to the corresponding decimal unicode digits. 921 * @param singleRange the specified Unicode range 922 * @return a non-contextual numeric shaper 923 * @throws IllegalArgumentException if the range is not a single range 924 */ 925 public static NumericShaper getShaper(int singleRange) { 926 int key = getKeyFromMask(singleRange); 927 return new NumericShaper(key, singleRange); 928 } 929 930 /** 931 * Returns a shaper for the provided Unicode 932 * range. All Latin-1 (EUROPEAN) digits are converted to the 933 * corresponding decimal digits of the specified Unicode range. 934 * 935 * @param singleRange the Unicode range given by a {@link 936 * NumericShaper.Range} constant. 937 * @return a non-contextual {@code NumericShaper}. 938 * @throws NullPointerException if {@code singleRange} is {@code null} 939 * @since 1.7 940 */ 941 public static NumericShaper getShaper(Range singleRange) { 942 return new NumericShaper(singleRange, EnumSet.of(singleRange)); 943 } 944 945 /** 946 * Returns a contextual shaper for the provided unicode range(s). 947 * Latin-1 (EUROPEAN) digits are converted to the decimal digits 948 * corresponding to the range of the preceding text, if the 949 * range is one of the provided ranges. Multiple ranges are 950 * represented by or-ing the values together, such as, 951 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 952 * shaper assumes EUROPEAN as the starting context, that is, if 953 * EUROPEAN digits are encountered before any strong directional 954 * text in the string, the context is presumed to be EUROPEAN, and 955 * so the digits will not shape. 956 * @param ranges the specified Unicode ranges 957 * @return a shaper for the specified ranges 958 */ 959 public static NumericShaper getContextualShaper(int ranges) { 960 ranges |= CONTEXTUAL_MASK; 961 return new NumericShaper(EUROPEAN_KEY, ranges); 962 } 963 964 /** 965 * Returns a contextual shaper for the provided Unicode 966 * range(s). The Latin-1 (EUROPEAN) digits are converted to the 967 * decimal digits corresponding to the range of the preceding 968 * text, if the range is one of the provided ranges. 969 * 970 * <p>The shaper assumes EUROPEAN as the starting context, that 971 * is, if EUROPEAN digits are encountered before any strong 972 * directional text in the string, the context is presumed to be 973 * EUROPEAN, and so the digits will not shape. 974 * 975 * @param ranges the specified Unicode ranges 976 * @return a contextual shaper for the specified ranges 977 * @throws NullPointerException if {@code ranges} is {@code null}. 978 * @since 1.7 979 */ 980 public static NumericShaper getContextualShaper(Set<Range> ranges) { 981 NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges); 982 shaper.mask = CONTEXTUAL_MASK; 983 return shaper; 984 } 985 986 /** 987 * Returns a contextual shaper for the provided unicode range(s). 988 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits 989 * corresponding to the range of the preceding text, if the 990 * range is one of the provided ranges. Multiple ranges are 991 * represented by or-ing the values together, for example, 992 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 993 * shaper uses defaultContext as the starting context. 994 * @param ranges the specified Unicode ranges 995 * @param defaultContext the starting context, such as 996 * <code>NumericShaper.EUROPEAN</code> 997 * @return a shaper for the specified Unicode ranges. 998 * @throws IllegalArgumentException if the specified 999 * <code>defaultContext</code> is not a single valid range. 1000 */ 1001 public static NumericShaper getContextualShaper(int ranges, int defaultContext) { 1002 int key = getKeyFromMask(defaultContext); 1003 ranges |= CONTEXTUAL_MASK; 1004 return new NumericShaper(key, ranges); 1005 } 1006 1007 /** 1008 * Returns a contextual shaper for the provided Unicode range(s). 1009 * The Latin-1 (EUROPEAN) digits will be converted to the decimal 1010 * digits corresponding to the range of the preceding text, if the 1011 * range is one of the provided ranges. The shaper uses {@code 1012 * defaultContext} as the starting context. 1013 * 1014 * @param ranges the specified Unicode ranges 1015 * @param defaultContext the starting context, such as 1016 * {@code NumericShaper.Range.EUROPEAN} 1017 * @return a contextual shaper for the specified Unicode ranges. 1018 * @throws NullPointerException 1019 * if {@code ranges} or {@code defaultContext} is {@code null} 1020 * @since 1.7 1021 */ 1022 public static NumericShaper getContextualShaper(Set<Range> ranges, 1023 Range defaultContext) { 1024 if (defaultContext == null) { 1025 throw new NullPointerException(); 1026 } 1027 NumericShaper shaper = new NumericShaper(defaultContext, ranges); 1028 shaper.mask = CONTEXTUAL_MASK; 1029 return shaper; 1030 } 1031 1032 /** 1033 * Private constructor. 1034 */ 1035 private NumericShaper(int key, int mask) { 1036 this.key = key; 1037 this.mask = mask; 1038 } 1039 1040 private NumericShaper(Range defaultContext, Set<Range> ranges) { 1041 shapingRange = defaultContext; 1042 rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null. 1043 1044 // Give precedance to EASTERN_ARABIC if both ARABIC and 1045 // EASTERN_ARABIC are specified. 1046 if (rangeSet.contains(Range.EASTERN_ARABIC) 1047 && rangeSet.contains(Range.ARABIC)) { 1048 rangeSet.remove(Range.ARABIC); 1049 } 1050 1051 // As well as the above case, give precedance to TAI_THAM_THAM if both 1052 // TAI_THAM_HORA and TAI_THAM_THAM are specified. 1053 if (rangeSet.contains(Range.TAI_THAM_THAM) 1054 && rangeSet.contains(Range.TAI_THAM_HORA)) { 1055 rangeSet.remove(Range.TAI_THAM_HORA); 1056 } 1057 1058 rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); 1059 if (rangeArray.length > BSEARCH_THRESHOLD) { 1060 // sort rangeArray for binary search 1061 Arrays.sort(rangeArray, 1062 new Comparator<Range>() { 1063 public int compare(Range s1, Range s2) { 1064 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1; 1065 } 1066 }); 1067 } 1068 } 1069 1070 /** 1071 * Converts the digits in the text that occur between start and 1072 * start + count. 1073 * @param text an array of characters to convert 1074 * @param start the index into <code>text</code> to start 1075 * converting 1076 * @param count the number of characters in <code>text</code> 1077 * to convert 1078 * @throws IndexOutOfBoundsException if start or start + count is 1079 * out of bounds 1080 * @throws NullPointerException if text is null 1081 */ 1082 public void shape(char[] text, int start, int count) { 1083 checkParams(text, start, count); 1084 if (isContextual()) { 1085 if (rangeSet == null) { 1086 shapeContextually(text, start, count, key); 1087 } else { 1088 shapeContextually(text, start, count, shapingRange); 1089 } 1090 } else { 1091 shapeNonContextually(text, start, count); 1092 } 1093 } 1094 1095 /** 1096 * Converts the digits in the text that occur between start and 1097 * start + count, using the provided context. 1098 * Context is ignored if the shaper is not a contextual shaper. 1099 * @param text an array of characters 1100 * @param start the index into <code>text</code> to start 1101 * converting 1102 * @param count the number of characters in <code>text</code> 1103 * to convert 1104 * @param context the context to which to convert the 1105 * characters, such as <code>NumericShaper.EUROPEAN</code> 1106 * @throws IndexOutOfBoundsException if start or start + count is 1107 * out of bounds 1108 * @throws NullPointerException if text is null 1109 * @throws IllegalArgumentException if this is a contextual shaper 1110 * and the specified <code>context</code> is not a single valid 1111 * range. 1112 */ 1113 public void shape(char[] text, int start, int count, int context) { 1114 checkParams(text, start, count); 1115 if (isContextual()) { 1116 int ctxKey = getKeyFromMask(context); 1117 if (rangeSet == null) { 1118 shapeContextually(text, start, count, ctxKey); 1119 } else { 1120 shapeContextually(text, start, count, Range.values()[ctxKey]); 1121 } 1122 } else { 1123 shapeNonContextually(text, start, count); 1124 } 1125 } 1126 1127 /** 1128 * Converts the digits in the text that occur between {@code 1129 * start} and {@code start + count}, using the provided {@code 1130 * context}. {@code Context} is ignored if the shaper is not a 1131 * contextual shaper. 1132 * 1133 * @param text a {@code char} array 1134 * @param start the index into {@code text} to start converting 1135 * @param count the number of {@code char}s in {@code text} 1136 * to convert 1137 * @param context the context to which to convert the characters, 1138 * such as {@code NumericShaper.Range.EUROPEAN} 1139 * @throws IndexOutOfBoundsException 1140 * if {@code start} or {@code start + count} is out of bounds 1141 * @throws NullPointerException 1142 * if {@code text} or {@code context} is null 1143 * @since 1.7 1144 */ 1145 public void shape(char[] text, int start, int count, Range context) { 1146 checkParams(text, start, count); 1147 if (context == null) { 1148 throw new NullPointerException("context is null"); 1149 } 1150 1151 if (isContextual()) { 1152 if (rangeSet != null) { 1153 shapeContextually(text, start, count, context); 1154 } else { 1155 int key = Range.toRangeIndex(context); 1156 if (key >= 0) { 1157 shapeContextually(text, start, count, key); 1158 } else { 1159 shapeContextually(text, start, count, shapingRange); 1160 } 1161 } 1162 } else { 1163 shapeNonContextually(text, start, count); 1164 } 1165 } 1166 1167 private void checkParams(char[] text, int start, int count) { 1168 if (text == null) { 1169 throw new NullPointerException("text is null"); 1170 } 1171 if ((start < 0) 1172 || (start > text.length) 1173 || ((start + count) < 0) 1174 || ((start + count) > text.length)) { 1175 throw new IndexOutOfBoundsException( 1176 "bad start or count for text of length " + text.length); 1177 } 1178 } 1179 1180 /** 1181 * Returns a <code>boolean</code> indicating whether or not 1182 * this shaper shapes contextually. 1183 * @return <code>true</code> if this shaper is contextual; 1184 * <code>false</code> otherwise. 1185 */ 1186 public boolean isContextual() { 1187 return (mask & CONTEXTUAL_MASK) != 0; 1188 } 1189 1190 /** 1191 * Returns an <code>int</code> that ORs together the values for 1192 * all the ranges that will be shaped. 1193 * <p> 1194 * For example, to check if a shaper shapes to Arabic, you would use the 1195 * following: 1196 * <blockquote> 1197 * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code> 1198 * </blockquote> 1199 * 1200 * <p>Note that this method supports only the bit mask-based 1201 * ranges. Call {@link #getRangeSet()} for the enum-based ranges. 1202 * 1203 * @return the values for all the ranges to be shaped. 1204 */ 1205 public int getRanges() { 1206 return mask & ~CONTEXTUAL_MASK; 1207 } 1208 1209 /** 1210 * Returns a {@code Set} representing all the Unicode ranges in 1211 * this {@code NumericShaper} that will be shaped. 1212 * 1213 * @return all the Unicode ranges to be shaped. 1214 * @since 1.7 1215 */ 1216 public Set<Range> getRangeSet() { 1217 if (rangeSet != null) { 1218 return EnumSet.copyOf(rangeSet); 1219 } 1220 return Range.maskToRangeSet(mask); 1221 } 1222 1223 /** 1224 * Perform non-contextual shaping. 1225 */ 1226 private void shapeNonContextually(char[] text, int start, int count) { 1227 int base; 1228 char minDigit = '0'; 1229 if (shapingRange != null) { 1230 base = shapingRange.getDigitBase(); 1231 minDigit += shapingRange.getNumericBase(); 1232 } else { 1233 base = bases[key]; 1234 if (key == ETHIOPIC_KEY) { 1235 minDigit++; // Ethiopic doesn't use decimal zero 1236 } 1237 } 1238 for (int i = start, e = start + count; i < e; ++i) { 1239 char c = text[i]; 1240 if (c >= minDigit && c <= '\u0039') { 1241 text[i] = (char)(c + base); 1242 } 1243 } 1244 } 1245 1246 /** 1247 * Perform contextual shaping. 1248 * Synchronized to protect caches used in getContextKey. 1249 */ 1250 private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { 1251 1252 // if we don't support this context, then don't shape 1253 if ((mask & (1<<ctxKey)) == 0) { 1254 ctxKey = EUROPEAN_KEY; 1255 } 1256 int lastkey = ctxKey; 1257 1258 int base = bases[ctxKey]; 1259 char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1260 1261 synchronized (NumericShaper.class) { 1262 for (int i = start, e = start + count; i < e; ++i) { 1263 char c = text[i]; 1264 if (c >= minDigit && c <= '\u0039') { 1265 text[i] = (char)(c + base); 1266 } 1267 1268 if (isStrongDirectional(c)) { 1269 int newkey = getContextKey(c); 1270 if (newkey != lastkey) { 1271 lastkey = newkey; 1272 1273 ctxKey = newkey; 1274 if (((mask & EASTERN_ARABIC) != 0) && 1275 (ctxKey == ARABIC_KEY || 1276 ctxKey == EASTERN_ARABIC_KEY)) { 1277 ctxKey = EASTERN_ARABIC_KEY; 1278 } else if (((mask & ARABIC) != 0) && 1279 (ctxKey == ARABIC_KEY || 1280 ctxKey == EASTERN_ARABIC_KEY)) { 1281 ctxKey = ARABIC_KEY; 1282 } else if ((mask & (1<<ctxKey)) == 0) { 1283 ctxKey = EUROPEAN_KEY; 1284 } 1285 1286 base = bases[ctxKey]; 1287 1288 minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1289 } 1290 } 1291 } 1292 } 1293 } 1294 1295 private void shapeContextually(char[] text, int start, int count, Range ctxKey) { 1296 // if we don't support the specified context, then don't shape. 1297 if (ctxKey == null || !rangeSet.contains(ctxKey)) { 1298 ctxKey = Range.EUROPEAN; 1299 } 1300 1301 Range lastKey = ctxKey; 1302 int base = ctxKey.getDigitBase(); 1303 char minDigit = (char)('0' + ctxKey.getNumericBase()); 1304 final int end = start + count; 1305 for (int i = start; i < end; ++i) { 1306 char c = text[i]; 1307 if (c >= minDigit && c <= '9') { 1308 text[i] = (char)(c + base); 1309 continue; 1310 } 1311 if (isStrongDirectional(c)) { 1312 ctxKey = rangeForCodePoint(c); 1313 if (ctxKey != lastKey) { 1314 lastKey = ctxKey; 1315 base = ctxKey.getDigitBase(); 1316 minDigit = (char)('0' + ctxKey.getNumericBase()); 1317 } 1318 } 1319 } 1320 } 1321 1322 /** 1323 * Returns a hash code for this shaper. 1324 * @return this shaper's hash code. 1325 * @see java.lang.Object#hashCode 1326 */ 1327 public int hashCode() { 1328 int hash = mask; 1329 if (rangeSet != null) { 1330 // Use the CONTEXTUAL_MASK bit only for the enum-based 1331 // NumericShaper. A deserialized NumericShaper might have 1332 // bit masks. 1333 hash &= CONTEXTUAL_MASK; 1334 hash ^= rangeSet.hashCode(); 1335 } 1336 return hash; 1337 } 1338 1339 /** 1340 * Returns {@code true} if the specified object is an instance of 1341 * <code>NumericShaper</code> and shapes identically to this one, 1342 * regardless of the range representations, the bit mask or the 1343 * enum. For example, the following code produces {@code "true"}. 1344 * <blockquote><pre> 1345 * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC); 1346 * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC); 1347 * System.out.println(ns1.equals(ns2)); 1348 * </pre></blockquote> 1349 * 1350 * @param o the specified object to compare to this 1351 * <code>NumericShaper</code> 1352 * @return <code>true</code> if <code>o</code> is an instance 1353 * of <code>NumericShaper</code> and shapes in the same way; 1354 * <code>false</code> otherwise. 1355 * @see java.lang.Object#equals(java.lang.Object) 1356 */ 1357 public boolean equals(Object o) { 1358 if (o != null) { 1359 try { 1360 NumericShaper rhs = (NumericShaper)o; 1361 if (rangeSet != null) { 1362 if (rhs.rangeSet != null) { 1363 return isContextual() == rhs.isContextual() 1364 && rangeSet.equals(rhs.rangeSet) 1365 && shapingRange == rhs.shapingRange; 1366 } 1367 return isContextual() == rhs.isContextual() 1368 && rangeSet.equals(Range.maskToRangeSet(rhs.mask)) 1369 && shapingRange == Range.indexToRange(rhs.key); 1370 } else if (rhs.rangeSet != null) { 1371 Set<Range> rset = Range.maskToRangeSet(mask); 1372 Range srange = Range.indexToRange(key); 1373 return isContextual() == rhs.isContextual() 1374 && rset.equals(rhs.rangeSet) 1375 && srange == rhs.shapingRange; 1376 } 1377 return rhs.mask == mask && rhs.key == key; 1378 } 1379 catch (ClassCastException e) { 1380 } 1381 } 1382 return false; 1383 } 1384 1385 /** 1386 * Returns a <code>String</code> that describes this shaper. This method 1387 * is used for debugging purposes only. 1388 * @return a <code>String</code> describing this shaper. 1389 */ 1390 public String toString() { 1391 StringBuilder buf = new StringBuilder(super.toString()); 1392 1393 buf.append("[contextual:").append(isContextual()); 1394 1395 String[] keyNames = null; 1396 if (isContextual()) { 1397 buf.append(", context:"); 1398 buf.append(shapingRange == null ? Range.values()[key] : shapingRange); 1399 } 1400 1401 if (rangeSet == null) { 1402 buf.append(", range(s): "); 1403 boolean first = true; 1404 for (int i = 0; i < NUM_KEYS; ++i) { 1405 if ((mask & (1 << i)) != 0) { 1406 if (first) { 1407 first = false; 1408 } else { 1409 buf.append(", "); 1410 } 1411 buf.append(Range.values()[i]); 1412 } 1413 } 1414 } else { 1415 buf.append(", range set: ").append(rangeSet); 1416 } 1417 buf.append(']'); 1418 1419 return buf.toString(); 1420 } 1421 1422 /** 1423 * Returns the index of the high bit in value (assuming le, actually 1424 * power of 2 >= value). value must be positive. 1425 */ 1426 private static int getHighBit(int value) { 1427 if (value <= 0) { 1428 return -32; 1429 } 1430 1431 int bit = 0; 1432 1433 if (value >= 1 << 16) { 1434 value >>= 16; 1435 bit += 16; 1436 } 1437 1438 if (value >= 1 << 8) { 1439 value >>= 8; 1440 bit += 8; 1441 } 1442 1443 if (value >= 1 << 4) { 1444 value >>= 4; 1445 bit += 4; 1446 } 1447 1448 if (value >= 1 << 2) { 1449 value >>= 2; 1450 bit += 2; 1451 } 1452 1453 if (value >= 1 << 1) { 1454 bit += 1; 1455 } 1456 1457 return bit; 1458 } 1459 1460 /** 1461 * fast binary search over subrange of array. 1462 */ 1463 private static int search(int value, int[] array, int start, int length) 1464 { 1465 int power = 1 << getHighBit(length); 1466 int extra = length - power; 1467 int probe = power; 1468 int index = start; 1469 1470 if (value >= array[index + extra]) { 1471 index += extra; 1472 } 1473 1474 while (probe > 1) { 1475 probe >>= 1; 1476 1477 if (value >= array[index + probe]) { 1478 index += probe; 1479 } 1480 } 1481 1482 return index; 1483 } 1484 1485 /** 1486 * Converts the {@code NumericShaper.Range} enum-based parameters, 1487 * if any, to the bit mask-based counterparts and writes this 1488 * object to the {@code stream}. Any enum constants that have no 1489 * bit mask-based counterparts are ignored in the conversion. 1490 * 1491 * @param stream the output stream to write to 1492 * @throws IOException if an I/O error occurs while writing to {@code stream} 1493 * @since 1.7 1494 */ 1495 private void writeObject(ObjectOutputStream stream) throws IOException { 1496 if (shapingRange != null) { 1497 int index = Range.toRangeIndex(shapingRange); 1498 if (index >= 0) { 1499 key = index; 1500 } 1501 } 1502 if (rangeSet != null) { 1503 mask |= Range.toRangeMask(rangeSet); 1504 } 1505 stream.defaultWriteObject(); 1506 } 1507} 1508