1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 1996-2015, International Business Machines Corporation and * 5 * others. All Rights Reserved. * 6 ******************************************************************************* 7 */ 8package android.icu.impl; 9 10import java.io.IOException; 11import java.util.ArrayList; 12import java.util.Locale; 13import java.util.regex.Pattern; 14 15import android.icu.lang.UCharacter; 16import android.icu.text.Replaceable; 17import android.icu.text.UTF16; 18import android.icu.text.UnicodeMatcher; 19 20/** 21 * @hide Only a subset of ICU is exposed in Android 22 */ 23public final class Utility { 24 25 private static final char APOSTROPHE = '\''; 26 private static final char BACKSLASH = '\\'; 27 private static final int MAGIC_UNSIGNED = 0x80000000; 28 29 /** 30 * Convenience utility to compare two Object[]s. 31 * Ought to be in System 32 */ 33 public final static boolean arrayEquals(Object[] source, Object target) { 34 if (source == null) return (target == null); 35 if (!(target instanceof Object[])) return false; 36 Object[] targ = (Object[]) target; 37 return (source.length == targ.length 38 && arrayRegionMatches(source, 0, targ, 0, source.length)); 39 } 40 41 /** 42 * Convenience utility to compare two int[]s 43 * Ought to be in System 44 */ 45 public final static boolean arrayEquals(int[] source, Object target) { 46 if (source == null) return (target == null); 47 if (!(target instanceof int[])) return false; 48 int[] targ = (int[]) target; 49 return (source.length == targ.length 50 && arrayRegionMatches(source, 0, targ, 0, source.length)); 51 } 52 53 /** 54 * Convenience utility to compare two double[]s 55 * Ought to be in System 56 */ 57 public final static boolean arrayEquals(double[] source, Object target) { 58 if (source == null) return (target == null); 59 if (!(target instanceof double[])) return false; 60 double[] targ = (double[]) target; 61 return (source.length == targ.length 62 && arrayRegionMatches(source, 0, targ, 0, source.length)); 63 } 64 public final static boolean arrayEquals(byte[] source, Object target) { 65 if (source == null) return (target == null); 66 if (!(target instanceof byte[])) return false; 67 byte[] targ = (byte[]) target; 68 return (source.length == targ.length 69 && arrayRegionMatches(source, 0, targ, 0, source.length)); 70 } 71 72 /** 73 * Convenience utility to compare two Object[]s 74 * Ought to be in System 75 */ 76 public final static boolean arrayEquals(Object source, Object target) { 77 if (source == null) return (target == null); 78 // for some reason, the correct arrayEquals is not being called 79 // so do it by hand for now. 80 if (source instanceof Object[]) 81 return(arrayEquals((Object[]) source,target)); 82 if (source instanceof int[]) 83 return(arrayEquals((int[]) source,target)); 84 if (source instanceof double[]) 85 return(arrayEquals((double[]) source, target)); 86 if (source instanceof byte[]) 87 return(arrayEquals((byte[]) source,target)); 88 return source.equals(target); 89 } 90 91 /** 92 * Convenience utility to compare two Object[]s 93 * Ought to be in System. 94 * @param len the length to compare. 95 * The start indices and start+len must be valid. 96 */ 97 public final static boolean arrayRegionMatches(Object[] source, int sourceStart, 98 Object[] target, int targetStart, 99 int len) 100 { 101 int sourceEnd = sourceStart + len; 102 int delta = targetStart - sourceStart; 103 for (int i = sourceStart; i < sourceEnd; i++) { 104 if (!arrayEquals(source[i],target[i + delta])) 105 return false; 106 } 107 return true; 108 } 109 110 /** 111 * Convenience utility to compare two Object[]s 112 * Ought to be in System. 113 * @param len the length to compare. 114 * The start indices and start+len must be valid. 115 */ 116 public final static boolean arrayRegionMatches(char[] source, int sourceStart, 117 char[] target, int targetStart, 118 int len) 119 { 120 int sourceEnd = sourceStart + len; 121 int delta = targetStart - sourceStart; 122 for (int i = sourceStart; i < sourceEnd; i++) { 123 if (source[i]!=target[i + delta]) 124 return false; 125 } 126 return true; 127 } 128 129 /** 130 * Convenience utility to compare two int[]s. 131 * @param len the length to compare. 132 * The start indices and start+len must be valid. 133 * Ought to be in System 134 */ 135 public final static boolean arrayRegionMatches(int[] source, int sourceStart, 136 int[] target, int targetStart, 137 int len) 138 { 139 int sourceEnd = sourceStart + len; 140 int delta = targetStart - sourceStart; 141 for (int i = sourceStart; i < sourceEnd; i++) { 142 if (source[i] != target[i + delta]) 143 return false; 144 } 145 return true; 146 } 147 148 /** 149 * Convenience utility to compare two arrays of doubles. 150 * @param len the length to compare. 151 * The start indices and start+len must be valid. 152 * Ought to be in System 153 */ 154 public final static boolean arrayRegionMatches(double[] source, int sourceStart, 155 double[] target, int targetStart, 156 int len) 157 { 158 int sourceEnd = sourceStart + len; 159 int delta = targetStart - sourceStart; 160 for (int i = sourceStart; i < sourceEnd; i++) { 161 if (source[i] != target[i + delta]) 162 return false; 163 } 164 return true; 165 } 166 public final static boolean arrayRegionMatches(byte[] source, int sourceStart, 167 byte[] target, int targetStart, int len){ 168 int sourceEnd = sourceStart + len; 169 int delta = targetStart - sourceStart; 170 for (int i = sourceStart; i < sourceEnd; i++) { 171 if (source[i] != target[i + delta]) 172 return false; 173 } 174 return true; 175 } 176 177 /** 178 * Convenience utility. Does null checks on objects, then calls equals. 179 */ 180 public final static boolean objectEquals(Object a, Object b) { 181 return a == null ? 182 b == null ? true : false : 183 b == null ? false : a.equals(b); 184 } 185 186 /** 187 * Convenience utility. Does null checks on objects, then calls compare. 188 */ 189 public static <T extends Comparable<T>> int checkCompare(T a, T b) { 190 return a == null ? 191 b == null ? 0 : -1 : 192 b == null ? 1 : a.compareTo(b); 193 } 194 195 /** 196 * Convenience utility. Does null checks on object, then calls hashCode. 197 */ 198 public static int checkHash(Object a) { 199 return a == null ? 0 : a.hashCode(); 200 } 201 202 /** 203 * The ESCAPE character is used during run-length encoding. It signals 204 * a run of identical chars. 205 */ 206 private static final char ESCAPE = '\uA5A5'; 207 208 /** 209 * The ESCAPE_BYTE character is used during run-length encoding. It signals 210 * a run of identical bytes. 211 */ 212 static final byte ESCAPE_BYTE = (byte)0xA5; 213 214 /** 215 * Construct a string representing an int array. Use run-length encoding. 216 * A character represents itself, unless it is the ESCAPE character. Then 217 * the following notations are possible: 218 * ESCAPE ESCAPE ESCAPE literal 219 * ESCAPE n c n instances of character c 220 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 221 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 222 * If we encounter a run where n == ESCAPE, we represent this as: 223 * c ESCAPE n-1 c 224 * The ESCAPE value is chosen so as not to collide with commonly 225 * seen values. 226 */ 227 static public final String arrayToRLEString(int[] a) { 228 StringBuilder buffer = new StringBuilder(); 229 230 appendInt(buffer, a.length); 231 int runValue = a[0]; 232 int runLength = 1; 233 for (int i=1; i<a.length; ++i) { 234 int s = a[i]; 235 if (s == runValue && runLength < 0xFFFF) { 236 ++runLength; 237 } else { 238 encodeRun(buffer, runValue, runLength); 239 runValue = s; 240 runLength = 1; 241 } 242 } 243 encodeRun(buffer, runValue, runLength); 244 return buffer.toString(); 245 } 246 247 /** 248 * Construct a string representing a short array. Use run-length encoding. 249 * A character represents itself, unless it is the ESCAPE character. Then 250 * the following notations are possible: 251 * ESCAPE ESCAPE ESCAPE literal 252 * ESCAPE n c n instances of character c 253 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 254 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 255 * If we encounter a run where n == ESCAPE, we represent this as: 256 * c ESCAPE n-1 c 257 * The ESCAPE value is chosen so as not to collide with commonly 258 * seen values. 259 */ 260 static public final String arrayToRLEString(short[] a) { 261 StringBuilder buffer = new StringBuilder(); 262 // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]); 263 buffer.append((char) (a.length >> 16)); 264 buffer.append((char) a.length); 265 short runValue = a[0]; 266 int runLength = 1; 267 for (int i=1; i<a.length; ++i) { 268 short s = a[i]; 269 if (s == runValue && runLength < 0xFFFF) ++runLength; 270 else { 271 encodeRun(buffer, runValue, runLength); 272 runValue = s; 273 runLength = 1; 274 } 275 } 276 encodeRun(buffer, runValue, runLength); 277 return buffer.toString(); 278 } 279 280 /** 281 * Construct a string representing a char array. Use run-length encoding. 282 * A character represents itself, unless it is the ESCAPE character. Then 283 * the following notations are possible: 284 * ESCAPE ESCAPE ESCAPE literal 285 * ESCAPE n c n instances of character c 286 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 287 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 288 * If we encounter a run where n == ESCAPE, we represent this as: 289 * c ESCAPE n-1 c 290 * The ESCAPE value is chosen so as not to collide with commonly 291 * seen values. 292 */ 293 static public final String arrayToRLEString(char[] a) { 294 StringBuilder buffer = new StringBuilder(); 295 buffer.append((char) (a.length >> 16)); 296 buffer.append((char) a.length); 297 char runValue = a[0]; 298 int runLength = 1; 299 for (int i=1; i<a.length; ++i) { 300 char s = a[i]; 301 if (s == runValue && runLength < 0xFFFF) ++runLength; 302 else { 303 encodeRun(buffer, (short)runValue, runLength); 304 runValue = s; 305 runLength = 1; 306 } 307 } 308 encodeRun(buffer, (short)runValue, runLength); 309 return buffer.toString(); 310 } 311 312 /** 313 * Construct a string representing a byte array. Use run-length encoding. 314 * Two bytes are packed into a single char, with a single extra zero byte at 315 * the end if needed. A byte represents itself, unless it is the 316 * ESCAPE_BYTE. Then the following notations are possible: 317 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 318 * ESCAPE_BYTE n b n instances of byte b 319 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 320 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 321 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 322 * b ESCAPE_BYTE n-1 b 323 * The ESCAPE_BYTE value is chosen so as not to collide with commonly 324 * seen values. 325 */ 326 static public final String arrayToRLEString(byte[] a) { 327 StringBuilder buffer = new StringBuilder(); 328 buffer.append((char) (a.length >> 16)); 329 buffer.append((char) a.length); 330 byte runValue = a[0]; 331 int runLength = 1; 332 byte[] state = new byte[2]; 333 for (int i=1; i<a.length; ++i) { 334 byte b = a[i]; 335 if (b == runValue && runLength < 0xFF) ++runLength; 336 else { 337 encodeRun(buffer, runValue, runLength, state); 338 runValue = b; 339 runLength = 1; 340 } 341 } 342 encodeRun(buffer, runValue, runLength, state); 343 344 // We must save the final byte, if there is one, by padding 345 // an extra zero. 346 if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state); 347 348 return buffer.toString(); 349 } 350 351 /** 352 * Encode a run, possibly a degenerate run (of < 4 values). 353 * @param length The length of the run; must be > 0 && <= 0xFFFF. 354 */ 355 private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) { 356 if (length < 4) { 357 for (int j=0; j<length; ++j) { 358 if (value == ESCAPE) { 359 appendInt(buffer, value); 360 } 361 appendInt(buffer, value); 362 } 363 } 364 else { 365 if (length == (int) ESCAPE) { 366 if (value == (int) ESCAPE) { 367 appendInt(buffer, ESCAPE); 368 } 369 appendInt(buffer, value); 370 --length; 371 } 372 appendInt(buffer, ESCAPE); 373 appendInt(buffer, length); 374 appendInt(buffer, value); // Don't need to escape this value 375 } 376 } 377 378 private static final <T extends Appendable> void appendInt(T buffer, int value) { 379 try { 380 buffer.append((char)(value >>> 16)); 381 buffer.append((char)(value & 0xFFFF)); 382 } catch (IOException e) { 383 throw new IllegalIcuArgumentException(e); 384 } 385 } 386 387 /** 388 * Encode a run, possibly a degenerate run (of < 4 values). 389 * @param length The length of the run; must be > 0 && <= 0xFFFF. 390 */ 391 private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) { 392 try { 393 if (length < 4) { 394 for (int j=0; j<length; ++j) { 395 if (value == (int) ESCAPE) 396 buffer.append(ESCAPE); 397 buffer.append((char) value); 398 } 399 } 400 else { 401 if (length == (int) ESCAPE) { 402 if (value == (int) ESCAPE) buffer.append(ESCAPE); 403 buffer.append((char) value); 404 --length; 405 } 406 buffer.append(ESCAPE); 407 buffer.append((char) length); 408 buffer.append((char) value); // Don't need to escape this value 409 } 410 } catch (IOException e) { 411 throw new IllegalIcuArgumentException(e); 412 } 413 } 414 415 /** 416 * Encode a run, possibly a degenerate run (of < 4 values). 417 * @param length The length of the run; must be > 0 && <= 0xFF. 418 */ 419 private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length, 420 byte[] state) { 421 if (length < 4) { 422 for (int j=0; j<length; ++j) { 423 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 424 appendEncodedByte(buffer, value, state); 425 } 426 } 427 else { 428 if (length == ESCAPE_BYTE) { 429 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state); 430 appendEncodedByte(buffer, value, state); 431 --length; 432 } 433 appendEncodedByte(buffer, ESCAPE_BYTE, state); 434 appendEncodedByte(buffer, (byte)length, state); 435 appendEncodedByte(buffer, value, state); // Don't need to escape this value 436 } 437 } 438 439 /** 440 * Append a byte to the given Appendable, packing two bytes into each 441 * character. The state parameter maintains intermediary data between 442 * calls. 443 * @param state A two-element array, with state[0] == 0 if this is the 444 * first byte of a pair, or state[0] != 0 if this is the second byte 445 * of a pair, in which case state[1] is the first byte. 446 */ 447 private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value, 448 byte[] state) { 449 try { 450 if (state[0] != 0) { 451 char c = (char) ((state[1] << 8) | (((int) value) & 0xFF)); 452 buffer.append(c); 453 state[0] = 0; 454 } 455 else { 456 state[0] = 1; 457 state[1] = value; 458 } 459 } catch (IOException e) { 460 throw new IllegalIcuArgumentException(e); 461 } 462 } 463 464 /** 465 * Construct an array of ints from a run-length encoded string. 466 */ 467 static public final int[] RLEStringToIntArray(String s) { 468 int length = getInt(s, 0); 469 int[] array = new int[length]; 470 int ai = 0, i = 1; 471 472 int maxI = s.length() / 2; 473 while (ai < length && i < maxI) { 474 int c = getInt(s, i++); 475 476 if (c == ESCAPE) { 477 c = getInt(s, i++); 478 if (c == ESCAPE) { 479 array[ai++] = c; 480 } else { 481 int runLength = c; 482 int runValue = getInt(s, i++); 483 for (int j=0; j<runLength; ++j) { 484 array[ai++] = runValue; 485 } 486 } 487 } 488 else { 489 array[ai++] = c; 490 } 491 } 492 493 if (ai != length || i != maxI) { 494 throw new IllegalStateException("Bad run-length encoded int array"); 495 } 496 497 return array; 498 } 499 static final int getInt(String s, int i) { 500 return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1); 501 } 502 503 /** 504 * Construct an array of shorts from a run-length encoded string. 505 */ 506 static public final short[] RLEStringToShortArray(String s) { 507 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1)); 508 short[] array = new short[length]; 509 int ai = 0; 510 for (int i=2; i<s.length(); ++i) { 511 char c = s.charAt(i); 512 if (c == ESCAPE) { 513 c = s.charAt(++i); 514 if (c == ESCAPE) { 515 array[ai++] = (short) c; 516 } else { 517 int runLength = (int) c; 518 short runValue = (short) s.charAt(++i); 519 for (int j=0; j<runLength; ++j) array[ai++] = runValue; 520 } 521 } 522 else { 523 array[ai++] = (short) c; 524 } 525 } 526 527 if (ai != length) 528 throw new IllegalStateException("Bad run-length encoded short array"); 529 530 return array; 531 } 532 533 /** 534 * Construct an array of shorts from a run-length encoded string. 535 */ 536 static public final char[] RLEStringToCharArray(String s) { 537 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1)); 538 char[] array = new char[length]; 539 int ai = 0; 540 for (int i=2; i<s.length(); ++i) { 541 char c = s.charAt(i); 542 if (c == ESCAPE) { 543 c = s.charAt(++i); 544 if (c == ESCAPE) { 545 array[ai++] = c; 546 } else { 547 int runLength = (int) c; 548 char runValue = s.charAt(++i); 549 for (int j=0; j<runLength; ++j) array[ai++] = runValue; 550 } 551 } 552 else { 553 array[ai++] = c; 554 } 555 } 556 557 if (ai != length) 558 throw new IllegalStateException("Bad run-length encoded short array"); 559 560 return array; 561 } 562 563 /** 564 * Construct an array of bytes from a run-length encoded string. 565 */ 566 static public final byte[] RLEStringToByteArray(String s) { 567 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1)); 568 byte[] array = new byte[length]; 569 boolean nextChar = true; 570 char c = 0; 571 int node = 0; 572 int runLength = 0; 573 int i = 2; 574 for (int ai=0; ai<length; ) { 575 // This part of the loop places the next byte into the local 576 // variable 'b' each time through the loop. It keeps the 577 // current character in 'c' and uses the boolean 'nextChar' 578 // to see if we've taken both bytes out of 'c' yet. 579 byte b; 580 if (nextChar) { 581 c = s.charAt(i++); 582 b = (byte) (c >> 8); 583 nextChar = false; 584 } 585 else { 586 b = (byte) (c & 0xFF); 587 nextChar = true; 588 } 589 590 // This part of the loop is a tiny state machine which handles 591 // the parsing of the run-length encoding. This would be simpler 592 // if we could look ahead, but we can't, so we use 'node' to 593 // move between three nodes in the state machine. 594 switch (node) { 595 case 0: 596 // Normal idle node 597 if (b == ESCAPE_BYTE) { 598 node = 1; 599 } 600 else { 601 array[ai++] = b; 602 } 603 break; 604 case 1: 605 // We have seen one ESCAPE_BYTE; we expect either a second 606 // one, or a run length and value. 607 if (b == ESCAPE_BYTE) { 608 array[ai++] = ESCAPE_BYTE; 609 node = 0; 610 } 611 else { 612 runLength = b; 613 // Interpret signed byte as unsigned 614 if (runLength < 0) runLength += 0x100; 615 node = 2; 616 } 617 break; 618 case 2: 619 // We have seen an ESCAPE_BYTE and length byte. We interpret 620 // the next byte as the value to be repeated. 621 for (int j=0; j<runLength; ++j) array[ai++] = b; 622 node = 0; 623 break; 624 } 625 } 626 627 if (node != 0) 628 throw new IllegalStateException("Bad run-length encoded byte array"); 629 630 if (i != s.length()) 631 throw new IllegalStateException("Excess data in RLE byte array string"); 632 633 return array; 634 } 635 636 static public String LINE_SEPARATOR = System.getProperty("line.separator"); 637 638 /** 639 * Format a String for representation in a source file. This includes 640 * breaking it into lines and escaping characters using octal notation 641 * when necessary (control characters and double quotes). 642 */ 643 static public final String formatForSource(String s) { 644 StringBuilder buffer = new StringBuilder(); 645 for (int i=0; i<s.length();) { 646 if (i > 0) buffer.append('+').append(LINE_SEPARATOR); 647 buffer.append(" \""); 648 int count = 11; 649 while (i<s.length() && count<80) { 650 char c = s.charAt(i++); 651 if (c < '\u0020' || c == '"' || c == '\\') { 652 if (c == '\n') { 653 buffer.append("\\n"); 654 count += 2; 655 } else if (c == '\t') { 656 buffer.append("\\t"); 657 count += 2; 658 } else if (c == '\r') { 659 buffer.append("\\r"); 660 count += 2; 661 } else { 662 // Represent control characters, backslash and double quote 663 // using octal notation; otherwise the string we form 664 // won't compile, since Unicode escape sequences are 665 // processed before tokenization. 666 buffer.append('\\'); 667 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 668 buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 669 buffer.append(HEX_DIGIT[(c & 0007)]); 670 count += 4; 671 } 672 } 673 else if (c <= '\u007E') { 674 buffer.append(c); 675 count += 1; 676 } 677 else { 678 buffer.append("\\u"); 679 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 680 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 681 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 682 buffer.append(HEX_DIGIT[(c & 0x000F)]); 683 count += 6; 684 } 685 } 686 buffer.append('"'); 687 } 688 return buffer.toString(); 689 } 690 691 static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7', 692 '8','9','A','B','C','D','E','F'}; 693 694 /** 695 * Format a String for representation in a source file. Like 696 * formatForSource but does not do line breaking. 697 */ 698 static public final String format1ForSource(String s) { 699 StringBuilder buffer = new StringBuilder(); 700 buffer.append("\""); 701 for (int i=0; i<s.length();) { 702 char c = s.charAt(i++); 703 if (c < '\u0020' || c == '"' || c == '\\') { 704 if (c == '\n') { 705 buffer.append("\\n"); 706 } else if (c == '\t') { 707 buffer.append("\\t"); 708 } else if (c == '\r') { 709 buffer.append("\\r"); 710 } else { 711 // Represent control characters, backslash and double quote 712 // using octal notation; otherwise the string we form 713 // won't compile, since Unicode escape sequences are 714 // processed before tokenization. 715 buffer.append('\\'); 716 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal 717 buffer.append(HEX_DIGIT[(c & 0070) >> 3]); 718 buffer.append(HEX_DIGIT[(c & 0007)]); 719 } 720 } 721 else if (c <= '\u007E') { 722 buffer.append(c); 723 } 724 else { 725 buffer.append("\\u"); 726 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); 727 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); 728 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); 729 buffer.append(HEX_DIGIT[(c & 0x000F)]); 730 } 731 } 732 buffer.append('"'); 733 return buffer.toString(); 734 } 735 736 /** 737 * Convert characters outside the range U+0020 to U+007F to 738 * Unicode escapes, and convert backslash to a double backslash. 739 */ 740 public static final String escape(String s) { 741 StringBuilder buf = new StringBuilder(); 742 for (int i=0; i<s.length(); ) { 743 int c = Character.codePointAt(s, i); 744 i += UTF16.getCharCount(c); 745 if (c >= ' ' && c <= 0x007F) { 746 if (c == '\\') { 747 buf.append("\\\\"); // That is, "\\" 748 } else { 749 buf.append((char)c); 750 } 751 } else { 752 boolean four = c <= 0xFFFF; 753 buf.append(four ? "\\u" : "\\U"); 754 buf.append(hex(c, four ? 4 : 8)); 755 } 756 } 757 return buf.toString(); 758 } 759 760 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 761 static private final char[] UNESCAPE_MAP = { 762 /*" 0x22, 0x22 */ 763 /*' 0x27, 0x27 */ 764 /*? 0x3F, 0x3F */ 765 /*\ 0x5C, 0x5C */ 766 /*a*/ 0x61, 0x07, 767 /*b*/ 0x62, 0x08, 768 /*e*/ 0x65, 0x1b, 769 /*f*/ 0x66, 0x0c, 770 /*n*/ 0x6E, 0x0a, 771 /*r*/ 0x72, 0x0d, 772 /*t*/ 0x74, 0x09, 773 /*v*/ 0x76, 0x0b 774 }; 775 776 /** 777 * Convert an escape to a 32-bit code point value. We attempt 778 * to parallel the icu4c unescapeAt() function. 779 * @param offset16 an array containing offset to the character 780 * <em>after</em> the backslash. Upon return offset16[0] will 781 * be updated to point after the escape sequence. 782 * @return character value from 0 to 10FFFF, or -1 on error. 783 */ 784 public static int unescapeAt(String s, int[] offset16) { 785 int c; 786 int result = 0; 787 int n = 0; 788 int minDig = 0; 789 int maxDig = 0; 790 int bitsPerDigit = 4; 791 int dig; 792 int i; 793 boolean braces = false; 794 795 /* Check that offset is in range */ 796 int offset = offset16[0]; 797 int length = s.length(); 798 if (offset < 0 || offset >= length) { 799 return -1; 800 } 801 802 /* Fetch first UChar after '\\' */ 803 c = Character.codePointAt(s, offset); 804 offset += UTF16.getCharCount(c); 805 806 /* Convert hexadecimal and octal escapes */ 807 switch (c) { 808 case 'u': 809 minDig = maxDig = 4; 810 break; 811 case 'U': 812 minDig = maxDig = 8; 813 break; 814 case 'x': 815 minDig = 1; 816 if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { 817 ++offset; 818 braces = true; 819 maxDig = 8; 820 } else { 821 maxDig = 2; 822 } 823 break; 824 default: 825 dig = UCharacter.digit(c, 8); 826 if (dig >= 0) { 827 minDig = 1; 828 maxDig = 3; 829 n = 1; /* Already have first octal digit */ 830 bitsPerDigit = 3; 831 result = dig; 832 } 833 break; 834 } 835 if (minDig != 0) { 836 while (offset < length && n < maxDig) { 837 c = UTF16.charAt(s, offset); 838 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); 839 if (dig < 0) { 840 break; 841 } 842 result = (result << bitsPerDigit) | dig; 843 offset += UTF16.getCharCount(c); 844 ++n; 845 } 846 if (n < minDig) { 847 return -1; 848 } 849 if (braces) { 850 if (c != 0x7D /*}*/) { 851 return -1; 852 } 853 ++offset; 854 } 855 if (result < 0 || result >= 0x110000) { 856 return -1; 857 } 858 // If an escape sequence specifies a lead surrogate, see 859 // if there is a trail surrogate after it, either as an 860 // escape or as a literal. If so, join them up into a 861 // supplementary. 862 if (offset < length && 863 UTF16.isLeadSurrogate((char) result)) { 864 int ahead = offset+1; 865 c = s.charAt(offset); // [sic] get 16-bit code unit 866 if (c == '\\' && ahead < length) { 867 int o[] = new int[] { ahead }; 868 c = unescapeAt(s, o); 869 ahead = o[0]; 870 } 871 if (UTF16.isTrailSurrogate((char) c)) { 872 offset = ahead; 873 result = Character.toCodePoint((char) result, (char) c); 874 } 875 } 876 offset16[0] = offset; 877 return result; 878 } 879 880 /* Convert C-style escapes in table */ 881 for (i=0; i<UNESCAPE_MAP.length; i+=2) { 882 if (c == UNESCAPE_MAP[i]) { 883 offset16[0] = offset; 884 return UNESCAPE_MAP[i+1]; 885 } else if (c < UNESCAPE_MAP[i]) { 886 break; 887 } 888 } 889 890 /* Map \cX to control-X: X & 0x1F */ 891 if (c == 'c' && offset < length) { 892 c = UTF16.charAt(s, offset); 893 offset16[0] = offset + UTF16.getCharCount(c); 894 return 0x1F & c; 895 } 896 897 /* If no special forms are recognized, then consider 898 * the backslash to generically escape the next character. */ 899 offset16[0] = offset; 900 return c; 901 } 902 903 /** 904 * Convert all escapes in a given string using unescapeAt(). 905 * @exception IllegalArgumentException if an invalid escape is 906 * seen. 907 */ 908 public static String unescape(String s) { 909 StringBuilder buf = new StringBuilder(); 910 int[] pos = new int[1]; 911 for (int i=0; i<s.length(); ) { 912 char c = s.charAt(i++); 913 if (c == '\\') { 914 pos[0] = i; 915 int e = unescapeAt(s, pos); 916 if (e < 0) { 917 throw new IllegalArgumentException("Invalid escape sequence " + 918 s.substring(i-1, Math.min(i+8, s.length()))); 919 } 920 buf.appendCodePoint(e); 921 i = pos[0]; 922 } else { 923 buf.append(c); 924 } 925 } 926 return buf.toString(); 927 } 928 929 /** 930 * Convert all escapes in a given string using unescapeAt(). 931 * Leave invalid escape sequences unchanged. 932 */ 933 public static String unescapeLeniently(String s) { 934 StringBuilder buf = new StringBuilder(); 935 int[] pos = new int[1]; 936 for (int i=0; i<s.length(); ) { 937 char c = s.charAt(i++); 938 if (c == '\\') { 939 pos[0] = i; 940 int e = unescapeAt(s, pos); 941 if (e < 0) { 942 buf.append(c); 943 } else { 944 buf.appendCodePoint(e); 945 i = pos[0]; 946 } 947 } else { 948 buf.append(c); 949 } 950 } 951 return buf.toString(); 952 } 953 954 /** 955 * Convert a char to 4 hex uppercase digits. E.g., hex('a') => 956 * "0041". 957 */ 958 public static String hex(long ch) { 959 return hex(ch, 4); 960 } 961 962 /** 963 * Supplies a zero-padded hex representation of an integer (without 0x) 964 */ 965 static public String hex(long i, int places) { 966 if (i == Long.MIN_VALUE) return "-8000000000000000"; 967 boolean negative = i < 0; 968 if (negative) { 969 i = -i; 970 } 971 String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH); 972 if (result.length() < places) { 973 result = "0000000000000000".substring(result.length(),places) + result; 974 } 975 if (negative) { 976 return '-' + result; 977 } 978 return result; 979 } 980 981 /** 982 * Convert a string to comma-separated groups of 4 hex uppercase 983 * digits. E.g., hex('ab') => "0041,0042". 984 */ 985 public static String hex(CharSequence s) { 986 return hex(s, 4, ",", true, new StringBuilder()).toString(); 987 } 988 989 /** 990 * Convert a string to separated groups of hex uppercase 991 * digits. E.g., hex('ab'...) => "0041,0042". Append the output 992 * to the given Appendable. 993 */ 994 public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) { 995 try { 996 if (useCodePoints) { 997 int cp; 998 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 999 cp = Character.codePointAt(s, i); 1000 if (i != 0) { 1001 result.append(separator); 1002 } 1003 result.append(hex(cp,width)); 1004 } 1005 } else { 1006 for (int i = 0; i < s.length(); ++i) { 1007 if (i != 0) { 1008 result.append(separator); 1009 } 1010 result.append(hex(s.charAt(i),width)); 1011 } 1012 } 1013 return result; 1014 } catch (IOException e) { 1015 throw new IllegalIcuArgumentException(e); 1016 } 1017 } 1018 1019 public static String hex(byte[] o, int start, int end, String separator) { 1020 StringBuilder result = new StringBuilder(); 1021 //int ch; 1022 for (int i = start; i < end; ++i) { 1023 if (i != 0) result.append(separator); 1024 result.append(hex(o[i])); 1025 } 1026 return result.toString(); 1027 } 1028 1029 /** 1030 * Convert a string to comma-separated groups of 4 hex uppercase 1031 * digits. E.g., hex('ab') => "0041,0042". 1032 */ 1033 public static <S extends CharSequence> String hex(S s, int width, S separator) { 1034 return hex(s, width, separator, true, new StringBuilder()).toString(); 1035 } 1036 1037 /** 1038 * Split a string into pieces based on the given divider character 1039 * @param s the string to split 1040 * @param divider the character on which to split. Occurrences of 1041 * this character are not included in the output 1042 * @param output an array to receive the substrings between 1043 * instances of divider. It must be large enough on entry to 1044 * accomodate all output. Adjacent instances of the divider 1045 * character will place empty strings into output. Before 1046 * returning, output is padded out with empty strings. 1047 */ 1048 public static void split(String s, char divider, String[] output) { 1049 int last = 0; 1050 int current = 0; 1051 int i; 1052 for (i = 0; i < s.length(); ++i) { 1053 if (s.charAt(i) == divider) { 1054 output[current++] = s.substring(last,i); 1055 last = i+1; 1056 } 1057 } 1058 output[current++] = s.substring(last,i); 1059 while (current < output.length) { 1060 output[current++] = ""; 1061 } 1062 } 1063 1064 /** 1065 * Split a string into pieces based on the given divider character 1066 * @param s the string to split 1067 * @param divider the character on which to split. Occurrences of 1068 * this character are not included in the output 1069 * @return output an array to receive the substrings between 1070 * instances of divider. Adjacent instances of the divider 1071 * character will place empty strings into output. 1072 */ 1073 public static String[] split(String s, char divider) { 1074 int last = 0; 1075 int i; 1076 ArrayList<String> output = new ArrayList<String>(); 1077 for (i = 0; i < s.length(); ++i) { 1078 if (s.charAt(i) == divider) { 1079 output.add(s.substring(last,i)); 1080 last = i+1; 1081 } 1082 } 1083 output.add( s.substring(last,i)); 1084 return output.toArray(new String[output.size()]); 1085 } 1086 1087 /** 1088 * Look up a given string in a string array. Returns the index at 1089 * which the first occurrence of the string was found in the 1090 * array, or -1 if it was not found. 1091 * @param source the string to search for 1092 * @param target the array of zero or more strings in which to 1093 * look for source 1094 * @return the index of target at which source first occurs, or -1 1095 * if not found 1096 */ 1097 public static int lookup(String source, String[] target) { 1098 for (int i = 0; i < target.length; ++i) { 1099 if (source.equals(target[i])) return i; 1100 } 1101 return -1; 1102 } 1103 1104 /** 1105 * Parse a single non-whitespace character 'ch', optionally 1106 * preceded by whitespace. 1107 * @param id the string to be parsed 1108 * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the 1109 * offset of the first character to be parsed. On output, pos[0] 1110 * is the index after the last parsed character. If the parse 1111 * fails, pos[0] will be unchanged. 1112 * @param ch the non-whitespace character to be parsed. 1113 * @return true if 'ch' is seen preceded by zero or more 1114 * whitespace characters. 1115 */ 1116 public static boolean parseChar(String id, int[] pos, char ch) { 1117 int start = pos[0]; 1118 pos[0] = PatternProps.skipWhiteSpace(id, pos[0]); 1119 if (pos[0] == id.length() || 1120 id.charAt(pos[0]) != ch) { 1121 pos[0] = start; 1122 return false; 1123 } 1124 ++pos[0]; 1125 return true; 1126 } 1127 1128 /** 1129 * Parse a pattern string starting at offset pos. Keywords are 1130 * matched case-insensitively. Spaces may be skipped and may be 1131 * optional or required. Integer values may be parsed, and if 1132 * they are, they will be returned in the given array. If 1133 * successful, the offset of the next non-space character is 1134 * returned. On failure, -1 is returned. 1135 * @param pattern must only contain lowercase characters, which 1136 * will match their uppercase equivalents as well. A space 1137 * character matches one or more required spaces. A '~' character 1138 * matches zero or more optional spaces. A '#' character matches 1139 * an integer and stores it in parsedInts, which the caller must 1140 * ensure has enough capacity. 1141 * @param parsedInts array to receive parsed integers. Caller 1142 * must ensure that parsedInts.length is >= the number of '#' 1143 * signs in 'pattern'. 1144 * @return the position after the last character parsed, or -1 if 1145 * the parse failed 1146 */ 1147 @SuppressWarnings("fallthrough") 1148 public static int parsePattern(String rule, int pos, int limit, 1149 String pattern, int[] parsedInts) { 1150 // TODO Update this to handle surrogates 1151 int[] p = new int[1]; 1152 int intCount = 0; // number of integers parsed 1153 for (int i=0; i<pattern.length(); ++i) { 1154 char cpat = pattern.charAt(i); 1155 char c; 1156 switch (cpat) { 1157 case ' ': 1158 if (pos >= limit) { 1159 return -1; 1160 } 1161 c = rule.charAt(pos++); 1162 if (!PatternProps.isWhiteSpace(c)) { 1163 return -1; 1164 } 1165 // FALL THROUGH to skipWhitespace 1166 case '~': 1167 pos = PatternProps.skipWhiteSpace(rule, pos); 1168 break; 1169 case '#': 1170 p[0] = pos; 1171 parsedInts[intCount++] = parseInteger(rule, p, limit); 1172 if (p[0] == pos) { 1173 // Syntax error; failed to parse integer 1174 return -1; 1175 } 1176 pos = p[0]; 1177 break; 1178 default: 1179 if (pos >= limit) { 1180 return -1; 1181 } 1182 c = (char) UCharacter.toLowerCase(rule.charAt(pos++)); 1183 if (c != cpat) { 1184 return -1; 1185 } 1186 break; 1187 } 1188 } 1189 return pos; 1190 } 1191 1192 /** 1193 * Parse a pattern string within the given Replaceable and a parsing 1194 * pattern. Characters are matched literally and case-sensitively 1195 * except for the following special characters: 1196 * 1197 * ~ zero or more Pattern_White_Space chars 1198 * 1199 * If end of pattern is reached with all matches along the way, 1200 * pos is advanced to the first unparsed index and returned. 1201 * Otherwise -1 is returned. 1202 * @param pat pattern that controls parsing 1203 * @param text text to be parsed, starting at index 1204 * @param index offset to first character to parse 1205 * @param limit offset after last character to parse 1206 * @return index after last parsed character, or -1 on parse failure. 1207 */ 1208 public static int parsePattern(String pat, 1209 Replaceable text, 1210 int index, 1211 int limit) { 1212 int ipat = 0; 1213 1214 // empty pattern matches immediately 1215 if (ipat == pat.length()) { 1216 return index; 1217 } 1218 1219 int cpat = Character.codePointAt(pat, ipat); 1220 1221 while (index < limit) { 1222 int c = text.char32At(index); 1223 1224 // parse \s* 1225 if (cpat == '~') { 1226 if (PatternProps.isWhiteSpace(c)) { 1227 index += UTF16.getCharCount(c); 1228 continue; 1229 } else { 1230 if (++ipat == pat.length()) { 1231 return index; // success; c unparsed 1232 } 1233 // fall thru; process c again with next cpat 1234 } 1235 } 1236 1237 // parse literal 1238 else if (c == cpat) { 1239 int n = UTF16.getCharCount(c); 1240 index += n; 1241 ipat += n; 1242 if (ipat == pat.length()) { 1243 return index; // success; c parsed 1244 } 1245 // fall thru; get next cpat 1246 } 1247 1248 // match failure of literal 1249 else { 1250 return -1; 1251 } 1252 1253 cpat = UTF16.charAt(pat, ipat); 1254 } 1255 1256 return -1; // text ended before end of pat 1257 } 1258 1259 /** 1260 * Parse an integer at pos, either of the form \d+ or of the form 1261 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, 1262 * or octal format. 1263 * @param pos INPUT-OUTPUT parameter. On input, the first 1264 * character to parse. On output, the character after the last 1265 * parsed character. 1266 */ 1267 public static int parseInteger(String rule, int[] pos, int limit) { 1268 int count = 0; 1269 int value = 0; 1270 int p = pos[0]; 1271 int radix = 10; 1272 1273 if (rule.regionMatches(true, p, "0x", 0, 2)) { 1274 p += 2; 1275 radix = 16; 1276 } else if (p < limit && rule.charAt(p) == '0') { 1277 p++; 1278 count = 1; 1279 radix = 8; 1280 } 1281 1282 while (p < limit) { 1283 int d = UCharacter.digit(rule.charAt(p++), radix); 1284 if (d < 0) { 1285 --p; 1286 break; 1287 } 1288 ++count; 1289 int v = (value * radix) + d; 1290 if (v <= value) { 1291 // If there are too many input digits, at some point 1292 // the value will go negative, e.g., if we have seen 1293 // "0x8000000" already and there is another '0', when 1294 // we parse the next 0 the value will go negative. 1295 return 0; 1296 } 1297 value = v; 1298 } 1299 if (count > 0) { 1300 pos[0] = p; 1301 } 1302 return value; 1303 } 1304 1305 /** 1306 * Parse a Unicode identifier from the given string at the given 1307 * position. Return the identifier, or null if there is no 1308 * identifier. 1309 * @param str the string to parse 1310 * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the 1311 * first character to examine. It must be less than str.length(), 1312 * and it must not point to a whitespace character. That is, must 1313 * have pos[0] < str.length(). On 1314 * OUTPUT, the position after the last parsed character. 1315 * @return the Unicode identifier, or null if there is no valid 1316 * identifier at pos[0]. 1317 */ 1318 public static String parseUnicodeIdentifier(String str, int[] pos) { 1319 // assert(pos[0] < str.length()); 1320 StringBuilder buf = new StringBuilder(); 1321 int p = pos[0]; 1322 while (p < str.length()) { 1323 int ch = Character.codePointAt(str, p); 1324 if (buf.length() == 0) { 1325 if (UCharacter.isUnicodeIdentifierStart(ch)) { 1326 buf.appendCodePoint(ch); 1327 } else { 1328 return null; 1329 } 1330 } else { 1331 if (UCharacter.isUnicodeIdentifierPart(ch)) { 1332 buf.appendCodePoint(ch); 1333 } else { 1334 break; 1335 } 1336 } 1337 p += UTF16.getCharCount(ch); 1338 } 1339 pos[0] = p; 1340 return buf.toString(); 1341 } 1342 1343 static final char DIGITS[] = { 1344 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 1345 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 1346 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 1347 'U', 'V', 'W', 'X', 'Y', 'Z' 1348 }; 1349 1350 /** 1351 * Append the digits of a positive integer to the given 1352 * <code>Appendable</code> in the given radix. This is 1353 * done recursively since it is easiest to generate the low- 1354 * order digit first, but it must be appended last. 1355 * 1356 * @param result is the <code>Appendable</code> to append to 1357 * @param n is the positive integer 1358 * @param radix is the radix, from 2 to 36 inclusive 1359 * @param minDigits is the minimum number of digits to append. 1360 */ 1361 private static <T extends Appendable> void recursiveAppendNumber(T result, int n, 1362 int radix, int minDigits) 1363 { 1364 try { 1365 int digit = n % radix; 1366 1367 if (n >= radix || minDigits > 1) { 1368 recursiveAppendNumber(result, n / radix, radix, minDigits - 1); 1369 } 1370 result.append(DIGITS[digit]); 1371 } catch (IOException e) { 1372 throw new IllegalIcuArgumentException(e); 1373 } 1374 } 1375 1376 /** 1377 * Append a number to the given Appendable in the given radix. 1378 * Standard digits '0'-'9' are used and letters 'A'-'Z' for 1379 * radices 11 through 36. 1380 * @param result the digits of the number are appended here 1381 * @param n the number to be converted to digits; may be negative. 1382 * If negative, a '-' is prepended to the digits. 1383 * @param radix a radix from 2 to 36 inclusive. 1384 * @param minDigits the minimum number of digits, not including 1385 * any '-', to produce. Values less than 2 have no effect. One 1386 * digit is always emitted regardless of this parameter. 1387 * @return a reference to result 1388 */ 1389 public static <T extends Appendable> T appendNumber(T result, int n, 1390 int radix, int minDigits) 1391 { 1392 try { 1393 if (radix < 2 || radix > 36) { 1394 throw new IllegalArgumentException("Illegal radix " + radix); 1395 } 1396 1397 1398 int abs = n; 1399 1400 if (n < 0) { 1401 abs = -n; 1402 result.append("-"); 1403 } 1404 1405 recursiveAppendNumber(result, abs, radix, minDigits); 1406 1407 return result; 1408 } catch (IOException e) { 1409 throw new IllegalIcuArgumentException(e); 1410 } 1411 1412 } 1413 1414 /** 1415 * Parse an unsigned 31-bit integer at the given offset. Use 1416 * UCharacter.digit() to parse individual characters into digits. 1417 * @param text the text to be parsed 1418 * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the 1419 * offset within text at which to start parsing; it should point 1420 * to a valid digit. On exit, pos[0] is the offset after the last 1421 * parsed character. If the parse failed, it will be unchanged on 1422 * exit. Must be >= 0 on entry. 1423 * @param radix the radix in which to parse; must be >= 2 and <= 1424 * 36. 1425 * @return a non-negative parsed number, or -1 upon parse failure. 1426 * Parse fails if there are no digits, that is, if pos[0] does not 1427 * point to a valid digit on entry, or if the number to be parsed 1428 * does not fit into a 31-bit unsigned integer. 1429 */ 1430 public static int parseNumber(String text, int[] pos, int radix) { 1431 // assert(pos[0] >= 0); 1432 // assert(radix >= 2); 1433 // assert(radix <= 36); 1434 int n = 0; 1435 int p = pos[0]; 1436 while (p < text.length()) { 1437 int ch = Character.codePointAt(text, p); 1438 int d = UCharacter.digit(ch, radix); 1439 if (d < 0) { 1440 break; 1441 } 1442 n = radix*n + d; 1443 // ASSUME that when a 32-bit integer overflows it becomes 1444 // negative. E.g., 214748364 * 10 + 8 => negative value. 1445 if (n < 0) { 1446 return -1; 1447 } 1448 ++p; 1449 } 1450 if (p == pos[0]) { 1451 return -1; 1452 } 1453 pos[0] = p; 1454 return n; 1455 } 1456 1457 /** 1458 * Return true if the character is NOT printable ASCII. The tab, 1459 * newline and linefeed characters are considered unprintable. 1460 */ 1461 public static boolean isUnprintable(int c) { 1462 //0x20 = 32 and 0x7E = 126 1463 return !(c >= 0x20 && c <= 0x7E); 1464 } 1465 1466 /** 1467 * Escape unprintable characters using <backslash>uxxxx notation 1468 * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and 1469 * above. If the character is printable ASCII, then do nothing 1470 * and return FALSE. Otherwise, append the escaped notation and 1471 * return TRUE. 1472 */ 1473 public static <T extends Appendable> boolean escapeUnprintable(T result, int c) { 1474 try { 1475 if (isUnprintable(c)) { 1476 result.append('\\'); 1477 if ((c & ~0xFFFF) != 0) { 1478 result.append('U'); 1479 result.append(DIGITS[0xF&(c>>28)]); 1480 result.append(DIGITS[0xF&(c>>24)]); 1481 result.append(DIGITS[0xF&(c>>20)]); 1482 result.append(DIGITS[0xF&(c>>16)]); 1483 } else { 1484 result.append('u'); 1485 } 1486 result.append(DIGITS[0xF&(c>>12)]); 1487 result.append(DIGITS[0xF&(c>>8)]); 1488 result.append(DIGITS[0xF&(c>>4)]); 1489 result.append(DIGITS[0xF&c]); 1490 return true; 1491 } 1492 return false; 1493 } catch (IOException e) { 1494 throw new IllegalIcuArgumentException(e); 1495 } 1496 } 1497 1498 /** 1499 * Returns the index of the first character in a set, ignoring quoted text. 1500 * For example, in the string "abc'hide'h", the 'h' in "hide" will not be 1501 * found by a search for "h". Unlike String.indexOf(), this method searches 1502 * not for a single character, but for any character of the string 1503 * <code>setOfChars</code>. 1504 * @param text text to be searched 1505 * @param start the beginning index, inclusive; <code>0 <= start 1506 * <= limit</code>. 1507 * @param limit the ending index, exclusive; <code>start <= limit 1508 * <= text.length()</code>. 1509 * @param setOfChars string with one or more distinct characters 1510 * @return Offset of the first character in <code>setOfChars</code> 1511 * found, or -1 if not found. 1512 * @see String#indexOf 1513 */ 1514 public static int quotedIndexOf(String text, int start, int limit, 1515 String setOfChars) { 1516 for (int i=start; i<limit; ++i) { 1517 char c = text.charAt(i); 1518 if (c == BACKSLASH) { 1519 ++i; 1520 } else if (c == APOSTROPHE) { 1521 while (++i < limit 1522 && text.charAt(i) != APOSTROPHE) {} 1523 } else if (setOfChars.indexOf(c) >= 0) { 1524 return i; 1525 } 1526 } 1527 return -1; 1528 } 1529 1530 /** 1531 * Append a character to a rule that is being built up. To flush 1532 * the quoteBuf to rule, make one final call with isLiteral == true. 1533 * If there is no final character, pass in (int)-1 as c. 1534 * @param rule the string to append the character to 1535 * @param c the character to append, or (int)-1 if none. 1536 * @param isLiteral if true, then the given character should not be 1537 * quoted or escaped. Usually this means it is a syntactic element 1538 * such as > or $ 1539 * @param escapeUnprintable if true, then unprintable characters 1540 * should be escaped using escapeUnprintable(). These escapes will 1541 * appear outside of quotes. 1542 * @param quoteBuf a buffer which is used to build up quoted 1543 * substrings. The caller should initially supply an empty buffer, 1544 * and thereafter should not modify the buffer. The buffer should be 1545 * cleared out by, at the end, calling this method with a literal 1546 * character (which may be -1). 1547 */ 1548 public static void appendToRule(StringBuffer rule, 1549 int c, 1550 boolean isLiteral, 1551 boolean escapeUnprintable, 1552 StringBuffer quoteBuf) { 1553 // If we are escaping unprintables, then escape them outside 1554 // quotes. \\u and \\U are not recognized within quotes. The same 1555 // logic applies to literals, but literals are never escaped. 1556 if (isLiteral || 1557 (escapeUnprintable && Utility.isUnprintable(c))) { 1558 if (quoteBuf.length() > 0) { 1559 // We prefer backslash APOSTROPHE to double APOSTROPHE 1560 // (more readable, less similar to ") so if there are 1561 // double APOSTROPHEs at the ends, we pull them outside 1562 // of the quote. 1563 1564 // If the first thing in the quoteBuf is APOSTROPHE 1565 // (doubled) then pull it out. 1566 while (quoteBuf.length() >= 2 && 1567 quoteBuf.charAt(0) == APOSTROPHE && 1568 quoteBuf.charAt(1) == APOSTROPHE) { 1569 rule.append(BACKSLASH).append(APOSTROPHE); 1570 quoteBuf.delete(0, 2); 1571 } 1572 // If the last thing in the quoteBuf is APOSTROPHE 1573 // (doubled) then remove and count it and add it after. 1574 int trailingCount = 0; 1575 while (quoteBuf.length() >= 2 && 1576 quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && 1577 quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { 1578 quoteBuf.setLength(quoteBuf.length()-2); 1579 ++trailingCount; 1580 } 1581 if (quoteBuf.length() > 0) { 1582 rule.append(APOSTROPHE); 1583 rule.append(quoteBuf); 1584 rule.append(APOSTROPHE); 1585 quoteBuf.setLength(0); 1586 } 1587 while (trailingCount-- > 0) { 1588 rule.append(BACKSLASH).append(APOSTROPHE); 1589 } 1590 } 1591 if (c != -1) { 1592 /* Since spaces are ignored during parsing, they are 1593 * emitted only for readability. We emit one here 1594 * only if there isn't already one at the end of the 1595 * rule. 1596 */ 1597 if (c == ' ') { 1598 int len = rule.length(); 1599 if (len > 0 && rule.charAt(len-1) != ' ') { 1600 rule.append(' '); 1601 } 1602 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) { 1603 rule.appendCodePoint(c); 1604 } 1605 } 1606 } 1607 1608 // Escape ' and '\' and don't begin a quote just for them 1609 else if (quoteBuf.length() == 0 && 1610 (c == APOSTROPHE || c == BACKSLASH)) { 1611 rule.append(BACKSLASH).append((char)c); 1612 } 1613 1614 // Specials (printable ascii that isn't [0-9a-zA-Z]) and 1615 // whitespace need quoting. Also append stuff to quotes if we are 1616 // building up a quoted substring already. 1617 else if (quoteBuf.length() > 0 || 1618 (c >= 0x0021 && c <= 0x007E && 1619 !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || 1620 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || 1621 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || 1622 PatternProps.isWhiteSpace(c)) { 1623 quoteBuf.appendCodePoint(c); 1624 // Double ' within a quote 1625 if (c == APOSTROPHE) { 1626 quoteBuf.append((char)c); 1627 } 1628 } 1629 1630 // Otherwise just append 1631 else { 1632 rule.appendCodePoint(c); 1633 } 1634 } 1635 1636 /** 1637 * Append the given string to the rule. Calls the single-character 1638 * version of appendToRule for each character. 1639 */ 1640 public static void appendToRule(StringBuffer rule, 1641 String text, 1642 boolean isLiteral, 1643 boolean escapeUnprintable, 1644 StringBuffer quoteBuf) { 1645 for (int i=0; i<text.length(); ++i) { 1646 // Okay to process in 16-bit code units here 1647 appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf); 1648 } 1649 } 1650 1651 /** 1652 * Given a matcher reference, which may be null, append its 1653 * pattern as a literal to the given rule. 1654 */ 1655 public static void appendToRule(StringBuffer rule, 1656 UnicodeMatcher matcher, 1657 boolean escapeUnprintable, 1658 StringBuffer quoteBuf) { 1659 if (matcher != null) { 1660 appendToRule(rule, matcher.toPattern(escapeUnprintable), 1661 true, escapeUnprintable, quoteBuf); 1662 } 1663 } 1664 1665 /** 1666 * Compares 2 unsigned integers 1667 * @param source 32 bit unsigned integer 1668 * @param target 32 bit unsigned integer 1669 * @return 0 if equals, 1 if source is greater than target and -1 1670 * otherwise 1671 */ 1672 public static final int compareUnsigned(int source, int target) 1673 { 1674 source += MAGIC_UNSIGNED; 1675 target += MAGIC_UNSIGNED; 1676 if (source < target) { 1677 return -1; 1678 } 1679 else if (source > target) { 1680 return 1; 1681 } 1682 return 0; 1683 } 1684 1685 /** 1686 * Find the highest bit in a positive integer. This is done 1687 * by doing a binary search through the bits. 1688 * 1689 * @param n is the integer 1690 * 1691 * @return the bit number of the highest bit, with 0 being 1692 * the low order bit, or -1 if <code>n</code> is not positive 1693 */ 1694 public static final byte highBit(int n) 1695 { 1696 if (n <= 0) { 1697 return -1; 1698 } 1699 1700 byte bit = 0; 1701 1702 if (n >= 1 << 16) { 1703 n >>= 16; 1704 bit += 16; 1705 } 1706 1707 if (n >= 1 << 8) { 1708 n >>= 8; 1709 bit += 8; 1710 } 1711 1712 if (n >= 1 << 4) { 1713 n >>= 4; 1714 bit += 4; 1715 } 1716 1717 if (n >= 1 << 2) { 1718 n >>= 2; 1719 bit += 2; 1720 } 1721 1722 if (n >= 1 << 1) { 1723 n >>= 1; 1724 bit += 1; 1725 } 1726 1727 return bit; 1728 } 1729 /** 1730 * Utility method to take a int[] containing codepoints and return 1731 * a string representation with code units. 1732 */ 1733 public static String valueOf(int[]source){ 1734 // TODO: Investigate why this method is not on UTF16 class 1735 StringBuilder result = new StringBuilder(source.length); 1736 for(int i=0; i<source.length; i++){ 1737 result.appendCodePoint(source[i]); 1738 } 1739 return result.toString(); 1740 } 1741 1742 1743 /** 1744 * Utility to duplicate a string count times 1745 * @param s String to be duplicated. 1746 * @param count Number of times to duplicate a string. 1747 */ 1748 public static String repeat(String s, int count) { 1749 if (count <= 0) return ""; 1750 if (count == 1) return s; 1751 StringBuilder result = new StringBuilder(); 1752 for (int i = 0; i < count; ++i) { 1753 result.append(s); 1754 } 1755 return result.toString(); 1756 } 1757 1758 public static String[] splitString(String src, String target) { 1759 return src.split("\\Q" + target + "\\E"); 1760 } 1761 1762 /** 1763 * Split the string at runs of ascii whitespace characters. 1764 */ 1765 public static String[] splitWhitespace(String src) { 1766 return src.split("\\s+"); 1767 } 1768 1769 /** 1770 * Parse a list of hex numbers and return a string 1771 * @param string String of hex numbers. 1772 * @param minLength Minimal length. 1773 * @param separator Separator. 1774 * @return A string from hex numbers. 1775 */ 1776 public static String fromHex(String string, int minLength, String separator) { 1777 return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+")); 1778 } 1779 1780 /** 1781 * Parse a list of hex numbers and return a string 1782 * @param string String of hex numbers. 1783 * @param minLength Minimal length. 1784 * @param separator Separator. 1785 * @return A string from hex numbers. 1786 */ 1787 public static String fromHex(String string, int minLength, Pattern separator) { 1788 StringBuilder buffer = new StringBuilder(); 1789 String[] parts = separator.split(string); 1790 for (String part : parts) { 1791 if (part.length() < minLength) { 1792 throw new IllegalArgumentException("code point too short: " + part); 1793 } 1794 int cp = Integer.parseInt(part, 16); 1795 buffer.appendCodePoint(cp); 1796 } 1797 return buffer.toString(); 1798 } 1799} 1800