1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 1996-2015, International Business Machines Corporation and * 5 * others. All Rights Reserved. * 6 ******************************************************************************* 7 */ 8package android.icu.dev.util; 9 10import java.util.ArrayList; 11import java.util.Collection; 12import java.util.Collections; 13import java.util.Comparator; 14import java.util.HashSet; 15import java.util.Iterator; 16import java.util.LinkedHashSet; 17import java.util.Map; 18import java.util.Map.Entry; 19import java.util.Set; 20import java.util.TreeMap; 21import java.util.TreeSet; 22 23import android.icu.impl.Utility; 24import android.icu.text.StringTransform; 25import android.icu.text.UTF16; 26import android.icu.text.UnicodeSet; 27import android.icu.text.UnicodeSetIterator; 28import android.icu.util.Freezable; 29 30/** 31 * Class for mapping Unicode characters and strings to values, optimized for single code points, 32 * where ranges of code points have the same value. 33 * Much smaller storage than using HashMap, and much faster and more compact than 34 * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some 35 * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values; 36 * that is, a put(x,null) is the same as remove(x).<br> 37 * At this point "" is also not allowed as a key, although that may change. 38 * @author markdavis 39 */ 40 41public final class UnicodeMap<T> implements Cloneable, Freezable<UnicodeMap<T>>, StringTransform, Iterable<String> { 42 /** 43 * For serialization 44 */ 45 //private static final long serialVersionUID = -6540936876295804105L; 46 static final boolean ASSERTIONS = false; 47 static final long GROWTH_PERCENT = 200; // 100 is no growth! 48 static final long GROWTH_GAP = 10; // extra bump! 49 50 private int length; 51 // two parallel arrays to save memory. Wish Java had structs. 52 private int[] transitions; 53 /* package private */ T[] values; 54 55 private LinkedHashSet<T> availableValues = new LinkedHashSet<T>(); 56 private transient boolean staleAvailableValues; 57 58 private transient boolean errorOnReset; 59 private volatile transient boolean locked; 60 private int lastIndex; 61 private TreeMap<String,T> stringMap; 62 63 { clear(); } 64 65 public UnicodeMap<T> clear() { 66 if (locked) { 67 throw new UnsupportedOperationException("Attempt to modify locked object"); 68 } 69 length = 2; 70 transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0}; 71 values = (T[]) new Object[10]; 72 73 availableValues.clear(); 74 staleAvailableValues = false; 75 76 errorOnReset = false; 77 lastIndex = 0; 78 stringMap = null; 79 return this; 80 } 81 82 /* Boilerplate */ 83 public boolean equals(Object other) { 84 if (other == null) return false; 85 try { 86 UnicodeMap that = (UnicodeMap) other; 87 if (length != that.length) return false; 88 for (int i = 0; i < length-1; ++i) { 89 if (transitions[i] != that.transitions[i]) return false; 90 if (!areEqual(values[i], that.values[i])) return false; 91 } 92 return true; 93 } catch (ClassCastException e) { 94 return false; 95 } 96 } 97 98 public static boolean areEqual(Object a , Object b) { 99 if (a == b) return true; 100 if (a == null || b == null) return false; 101 return a.equals(b); 102 } 103 104 public int hashCode() { 105 int result = length; 106 // TODO might want to abbreviate this for speed. 107 for (int i = 0; i < length-1; ++i) { 108 result = 37*result + transitions[i]; 109 result = 37*result + values[i].hashCode(); 110 } 111 return result; 112 } 113 114 /** 115 * Standard clone. Warning, as with Collections, does not do deep clone. 116 */ 117 public UnicodeMap<T> cloneAsThawed() { 118 UnicodeMap<T> that = new UnicodeMap<T>(); 119 that.length = length; 120 that.transitions = (int[]) transitions.clone(); 121 that.values = (T[]) values.clone(); 122 that.availableValues = new LinkedHashSet<T>(availableValues); 123 that.locked = false; 124 that.stringMap = stringMap == null ? null : (TreeMap<String, T>) stringMap.clone(); 125 return that; 126 } 127 128 /* for internal consistency checking */ 129 130 void _checkInvariants() { 131 if (length < 2 132 || length > transitions.length 133 || transitions.length != values.length) { 134 throw new IllegalArgumentException("Invariant failed: Lengths bad"); 135 } 136 for (int i = 1; i < length-1; ++i) { 137 if (areEqual(values[i-1], values[i])) { 138 throw new IllegalArgumentException("Invariant failed: values shared at " 139 + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">" 140 + "\t" + Utility.hex(i) + ": <" + values[i] + ">" 141 ); 142 } 143 } 144 if (transitions[0] != 0 || transitions[length-1] != 0x110000) { 145 throw new IllegalArgumentException("Invariant failed: bounds set wrong"); 146 } 147 for (int i = 1; i < length-1; ++i) { 148 if (transitions[i-1] >= transitions[i]) { 149 throw new IllegalArgumentException("Invariant failed: not monotonic" 150 + "\t" + Utility.hex(i-1) + ": " + transitions[i-1] 151 + "\t" + Utility.hex(i) + ": " + transitions[i] 152 ); 153 } 154 } 155 } 156 157 /** 158 * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1] 159 * Assumes that 0 <= codepoint <= 0x10FFFF 160 * @param codepoint 161 * @return the index 162 */ 163 private int _findIndex(int c) { 164 int lo = 0; 165 int hi = length - 1; 166 int i = (lo + hi) >>> 1; 167 // invariant: c >= list[lo] 168 // invariant: c < list[hi] 169 while (i != lo) { 170 if (c < transitions[i]) { 171 hi = i; 172 } else { 173 lo = i; 174 } 175 i = (lo + hi) >>> 1; 176 } 177 if (ASSERTIONS) _checkFind(c, lo); 178 return lo; 179 } 180 181 private void _checkFind(int codepoint, int value) { 182 int other = __findIndex(codepoint); 183 if (other != value) { 184 throw new IllegalArgumentException("Invariant failed: binary search" 185 + "\t" + Utility.hex(codepoint) + ": " + value 186 + "\tshould be: " + other); 187 } 188 } 189 190 private int __findIndex(int codepoint) { 191 for (int i = length-1; i > 0; --i) { 192 if (transitions[i] <= codepoint) return i; 193 } 194 return 0; 195 } 196 197 /* 198 * Try indexed lookup 199 200 static final int SHIFT = 8; 201 int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found 202 boolean startsValid = false; 203 private int findIndex(int codepoint) { 204 if (!startsValid) { 205 int start = 0; 206 for (int i = 1; i < length; ++i) { 207 208 } 209 } 210 for (int i = length-1; i > 0; --i) { 211 if (transitions[i] <= codepoint) return i; 212 } 213 return 0; 214 } 215 */ 216 217 /** 218 * Remove the items from index through index+count-1. 219 * Logically reduces the size of the internal arrays. 220 * @param index 221 * @param count 222 */ 223 private void _removeAt(int index, int count) { 224 for (int i = index + count; i < length; ++i) { 225 transitions[i-count] = transitions[i]; 226 values[i-count] = values[i]; 227 } 228 length -= count; 229 } 230 /** 231 * Add a gap from index to index+count-1. 232 * The values there are undefined, and must be set. 233 * Logically grows arrays to accomodate. Actual growth is limited 234 * @param index 235 * @param count 236 */ 237 private void _insertGapAt(int index, int count) { 238 int newLength = length + count; 239 int[] oldtransitions = transitions; 240 T[] oldvalues = values; 241 if (newLength > transitions.length) { 242 int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100); 243 transitions = new int[allocation]; 244 values = (T[]) new Object[allocation]; 245 for (int i = 0; i < index; ++i) { 246 transitions[i] = oldtransitions[i]; 247 values[i] = oldvalues[i]; 248 } 249 } 250 for (int i = length - 1; i >= index; --i) { 251 transitions[i+count] = oldtransitions[i]; 252 values[i+count] = oldvalues[i]; 253 } 254 length = newLength; 255 } 256 257 /** 258 * Associates code point with value. Removes any previous association. 259 * All code that calls this MUST check for frozen first! 260 * @param codepoint 261 * @param value 262 * @return this, for chaining 263 */ 264 private UnicodeMap _put(int codepoint, T value) { 265 // Warning: baseIndex is an invariant; must 266 // be defined such that transitions[baseIndex] < codepoint 267 // at end of this routine. 268 int baseIndex; 269 if (transitions[lastIndex] <= codepoint 270 && codepoint < transitions[lastIndex+1]) { 271 baseIndex = lastIndex; 272 } else { 273 baseIndex = _findIndex(codepoint); 274 } 275 int limitIndex = baseIndex + 1; 276 // cases are (a) value is already set 277 if (areEqual(values[baseIndex], value)) return this; 278 if (locked) { 279 throw new UnsupportedOperationException("Attempt to modify locked object"); 280 } 281 if (errorOnReset && values[baseIndex] != null) { 282 throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint) 283 + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value); 284 } 285 286 // adjust the available values 287 staleAvailableValues = true; 288 availableValues.add(value); // add if not there already 289 290 int baseCP = transitions[baseIndex]; 291 int limitCP = transitions[limitIndex]; 292 // we now start walking through the difference case, 293 // based on whether we are at the start or end of range 294 // and whether the range is a single character or multiple 295 296 if (baseCP == codepoint) { 297 // CASE: At very start of range 298 boolean connectsWithPrevious = 299 baseIndex != 0 && areEqual(value, values[baseIndex-1]); 300 301 if (limitCP == codepoint + 1) { 302 // CASE: Single codepoint range 303 boolean connectsWithFollowing = 304 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 305 306 if (connectsWithPrevious) { 307 // A1a connects with previous & following, so remove index 308 if (connectsWithFollowing) { 309 _removeAt(baseIndex, 2); 310 } else { 311 _removeAt(baseIndex, 1); // extend previous 312 } 313 --baseIndex; // fix up 314 } else if (connectsWithFollowing) { 315 _removeAt(baseIndex, 1); // extend following backwards 316 transitions[baseIndex] = codepoint; 317 } else { 318 // doesn't connect on either side, just reset 319 values[baseIndex] = value; 320 } 321 } else if (connectsWithPrevious) { 322 // A.1: start of multi codepoint range 323 // if connects 324 ++transitions[baseIndex]; // extend previous 325 } else { 326 // otherwise insert new transition 327 transitions[baseIndex] = codepoint+1; // fix following range 328 _insertGapAt(baseIndex, 1); 329 values[baseIndex] = value; 330 transitions[baseIndex] = codepoint; 331 } 332 } else if (limitCP == codepoint + 1) { 333 // CASE: at end of range 334 // if connects, just back up range 335 boolean connectsWithFollowing = 336 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 337 338 if (connectsWithFollowing) { 339 --transitions[limitIndex]; 340 return this; 341 } else { 342 _insertGapAt(limitIndex, 1); 343 transitions[limitIndex] = codepoint; 344 values[limitIndex] = value; 345 } 346 } else { 347 // CASE: in middle of range 348 // insert gap, then set the new range 349 _insertGapAt(++baseIndex,2); 350 transitions[baseIndex] = codepoint; 351 values[baseIndex] = value; 352 transitions[baseIndex+1] = codepoint + 1; 353 values[baseIndex+1] = values[baseIndex-1]; // copy lower range values 354 } 355 lastIndex = baseIndex; // store for next time 356 return this; 357 } 358 359 private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) { 360 // TODO optimize 361 for (int i = startCodePoint; i <= endCodePoint; ++i) { 362 _put(i, value); 363 if (ASSERTIONS) _checkInvariants(); 364 } 365 return this; 366 } 367 368 /** 369 * Sets the codepoint value. 370 * @param codepoint 371 * @param value 372 * @return this (for chaining) 373 */ 374 public UnicodeMap<T> put(int codepoint, T value) { 375 if (codepoint < 0 || codepoint > 0x10FFFF) { 376 throw new IllegalArgumentException("Codepoint out of range: " + codepoint); 377 } 378 _put(codepoint, value); 379 if (ASSERTIONS) _checkInvariants(); 380 return this; 381 } 382 383 /** 384 * Sets the codepoint value. 385 * @param codepoint 386 * @param value 387 * @return this (for chaining) 388 */ 389 public UnicodeMap<T> put(String string, T value) { 390 int v = UnicodeSet.getSingleCodePoint(string); 391 if (v == Integer.MAX_VALUE) { 392 if (locked) { 393 throw new UnsupportedOperationException("Attempt to modify locked object"); 394 } 395 if (value != null) { 396 if (stringMap == null) { 397 stringMap = new TreeMap<String,T>(); 398 } 399 stringMap.put(string, value); 400 staleAvailableValues = true; 401 } else if (stringMap != null) { 402 if (stringMap.remove(string) != null) { 403 staleAvailableValues = true; 404 } 405 } 406 return this; 407 } 408 return put(v, value); 409 } 410 411 /** 412 * Adds bunch o' codepoints; otherwise like put. 413 * @param codepoints 414 * @param value 415 * @return this (for chaining) 416 */ 417 public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) { 418 UnicodeSetIterator it = new UnicodeSetIterator(codepoints); 419 while (it.nextRange()) { 420 if (it.string == null) { 421 _putAll(it.codepoint, it.codepointEnd, value); 422 } else { 423 put(it.string, value); 424 } 425 } 426 return this; 427 } 428 429 /** 430 * Adds bunch o' codepoints; otherwise like add. 431 * @param startCodePoint 432 * @param endCodePoint 433 * @param value 434 * @return this (for chaining) 435 */ 436 public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) { 437 if (locked) { 438 throw new UnsupportedOperationException("Attempt to modify locked object"); 439 } 440 if (startCodePoint < 0 || endCodePoint > 0x10FFFF) { 441 throw new IllegalArgumentException("Codepoint out of range: " 442 + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint)); 443 } 444 return _putAll(startCodePoint, endCodePoint, value); 445 } 446 447 /** 448 * Add all the (main) values from a UnicodeMap 449 * @param unicodeMap the property to add to the map 450 * @return this (for chaining) 451 */ 452 public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) { 453 for (int i = 0; i < unicodeMap.length; ++i) { 454 T value = unicodeMap.values[i]; 455 if (value != null) { 456 _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value); 457 } 458 if (ASSERTIONS) _checkInvariants(); 459 } 460 if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) { 461 if (stringMap == null) { 462 stringMap = new TreeMap<String,T>(); 463 } 464 stringMap.putAll(unicodeMap.stringMap); 465 } 466 return this; 467 } 468 469 /** 470 * Add all the (main) values from a Unicode property 471 * @param prop the property to add to the map 472 * @return this (for chaining) 473 */ 474 public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) { 475 // TODO optimize 476 for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) { 477 if (it.codepoint != UnicodeSetIterator.IS_STRING) { 478 T value = prop.getValue(it.codepoint); 479 if (value != null) { 480 _put(it.codepoint, value); 481 } 482 } 483 } 484 // now do the strings 485 for (String key : filter.strings()) { 486 T value = prop.get(key); 487 if (value != null) { 488 put(key, value); 489 } 490 } 491 return this; 492 } 493 494 /** 495 * Set the currently unmapped Unicode code points to the given value. 496 * @param value the value to set 497 * @return this (for chaining) 498 */ 499 public UnicodeMap<T> setMissing(T value) { 500 // fast path, if value not yet present 501 if (!getAvailableValues().contains(value)) { 502 staleAvailableValues = true; 503 availableValues.add(value); 504 for (int i = 0; i < length; ++i) { 505 if (values[i] == null) values[i] = value; 506 } 507 return this; 508 } else { 509 return putAll(keySet(null), value); 510 } 511 } 512 /** 513 * Returns the keyset consisting of all the keys that would produce the given value. Deposits into 514 * result if it is not null. Remember to clear if you just want 515 * the new values. 516 */ 517 public UnicodeSet keySet(T value, UnicodeSet result) { 518 if (result == null) result = new UnicodeSet(); 519 for (int i = 0; i < length - 1; ++i) { 520 if (areEqual(value, values[i])) { 521 result.add(transitions[i], transitions[i+1]-1); 522 } 523 } 524 if (value != null && stringMap != null) { 525 for (String key : stringMap.keySet()) { 526 T newValue = stringMap.get(key); 527 if (value.equals(newValue)) { 528 result.add((String)key); 529 } 530 } 531 } 532 return result; 533 } 534 535 /** 536 * Returns the keyset consisting of all the keys that would produce the given value. 537 * the new values. 538 */ 539 public UnicodeSet keySet(T value) { 540 return keySet(value,null); 541 } 542 543 /** 544 * Returns the keyset consisting of all the keys that would produce (non-null) values. 545 */ 546 public UnicodeSet keySet() { 547 UnicodeSet result = new UnicodeSet(); 548 for (int i = 0; i < length - 1; ++i) { 549 if (values[i] != null) { 550 result.add(transitions[i], transitions[i+1]-1); 551 } 552 } 553 if (stringMap != null) { 554 result.addAll(stringMap.keySet()); 555 } 556 return result; 557 } 558 559 /** 560 * Returns the list of possible values. Deposits each non-null value into 561 * result. Creates result if it is null. Remember to clear result if 562 * you are not appending to existing collection. 563 * @param result 564 * @return result 565 */ 566 public <U extends Collection<T>> U values(U result) { 567 if (staleAvailableValues) { 568 // collect all the current values 569 // retain them in the availableValues 570 Set<T> temp = new HashSet<T>(); 571 for (int i = 0; i < length - 1; ++i) { 572 if (values[i] != null) temp.add(values[i]); 573 } 574 availableValues.retainAll(temp); 575 if (stringMap != null) { 576 availableValues.addAll(stringMap.values()); 577 } 578 staleAvailableValues = false; 579 } 580 if (result == null) { 581 result = (U) new ArrayList<T>(availableValues.size()); 582 } 583 result.addAll(availableValues); 584 return result; 585 } 586 587 /** 588 * Convenience method 589 */ 590 public Collection<T> values() { 591 return getAvailableValues(null); 592 } 593 /** 594 * Gets the value associated with a given code point. 595 * Returns null, if there is no such value. 596 * @param codepoint 597 * @return the value 598 */ 599 public T get(int codepoint) { 600 if (codepoint < 0 || codepoint > 0x10FFFF) { 601 throw new IllegalArgumentException("Codepoint out of range: " + codepoint); 602 } 603 return values[_findIndex(codepoint)]; 604 } 605 606 /** 607 * Gets the value associated with a given code point. 608 * Returns null, if there is no such value. 609 * @param codepoint 610 * @return the value 611 */ 612 public T get(String value) { 613 if (UTF16.hasMoreCodePointsThan(value, 1)) { 614 if (stringMap == null) { 615 return null; 616 } 617 return stringMap.get(value); 618 } 619 return getValue(UTF16.charAt(value, 0)); 620 } 621 622 623 /** 624 * Change a new string from the source string according to the mappings. 625 * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString() 626 * TODO: extend to strings 627 * @param source 628 * @return 629 */ 630 public String transform(String source) { 631 StringBuffer result = new StringBuffer(); 632 int cp; 633 for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 634 cp = UTF16.charAt(source, i); 635 T mResult = getValue(cp); 636 if (mResult != null) { 637 result.append(mResult); 638 } else { 639 UTF16.append(result, cp); 640 } 641 } 642 return result.toString(); 643 } 644 645 /** 646 * Used to add complex values, where the value isn't replaced but in some sense composed 647 * @author markdavis 648 */ 649 public abstract static class Composer<T> { 650 /** 651 * This will be called with either a string or a code point. The result is the new value for that item. 652 * If the codepoint is used, the string is null; if the string is used, the codepoint is -1. 653 * @param a 654 * @param b 655 */ 656 public abstract T compose(int codePoint, String string, T a, T b); 657 } 658 659 public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) { 660 for (T value : other.getAvailableValues()) { 661 UnicodeSet set = other.keySet(value); 662 composeWith(set, value, composer); 663 } 664 return this; 665 } 666 667 public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) { 668 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { 669 int i = it.codepoint; 670 if (i == UnicodeSetIterator.IS_STRING) { 671 String s = it.string; 672 T v1 = getValue(s); 673 T v3 = composer.compose(-1, s, v1, value); 674 if (v1 != v3 && (v1 == null || !v1.equals(v3))) { 675 put(s, v3); 676 } 677 } else { 678 T v1 = getValue(i); 679 T v3 = composer.compose(i, null, v1, value); 680 if (v1 != v3 && (v1 == null || !v1.equals(v3))) { 681 put(i, v3); 682 } 683 } 684 } 685 return this; 686 } 687 688 public String toString() { 689 return toString(null); 690 } 691 692 public String toString(Comparator<T> collected) { 693 StringBuffer result = new StringBuffer(); 694 if (collected == null) { 695 for (int i = 0; i < length-1; ++i) { 696 T value = values[i]; 697 if (value == null) continue; 698 int start = transitions[i]; 699 int end = transitions[i+1]-1; 700 result.append(Utility.hex(start)); 701 if (start != end) result.append("-").append(Utility.hex(end)); 702 result.append("=").append(value.toString()).append("\n"); 703 } 704 if (stringMap != null) { 705 for (String s : stringMap.keySet()) { 706 result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n"); 707 } 708 } 709 } else { 710 Set<T> set = values(new TreeSet<T>(collected)); 711 for (Iterator<T> it = set.iterator(); it.hasNext();) { 712 T value = it.next(); 713 UnicodeSet s = keySet(value); 714 result.append(value).append("=").append(s.toString()).append("\n"); 715 } 716 } 717 return result.toString(); 718 } 719 /** 720 * @return Returns the errorOnReset value. 721 */ 722 public boolean getErrorOnReset() { 723 return errorOnReset; 724 } 725 /** 726 * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception. 727 * @param errorOnReset The errorOnReset to set. 728 */ 729 public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) { 730 this.errorOnReset = errorOnReset; 731 return this; 732 } 733 734 /* (non-Javadoc) 735 * @see android.icu.dev.test.util.Freezable#isFrozen() 736 */ 737 public boolean isFrozen() { 738 // TODO Auto-generated method stub 739 return locked; 740 } 741 742 /* (non-Javadoc) 743 * @see android.icu.dev.test.util.Freezable#lock() 744 */ 745 public UnicodeMap<T> freeze() { 746 locked = true; 747 return this; 748 } 749 750 /** 751 * Utility to find the maximal common prefix of two strings. 752 * TODO: fix supplemental support 753 */ 754 static public int findCommonPrefix(String last, String s) { 755 int minLen = Math.min(last.length(), s.length()); 756 for (int i = 0; i < minLen; ++i) { 757 if (last.charAt(i) != s.charAt(i)) return i; 758 } 759 return minLen; 760 } 761 762 /** 763 * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings(). 764 */ 765 public int getRangeCount() { 766 return length-1; 767 } 768 769 /** 770 * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. 771 */ 772 public int getRangeStart(int range) { 773 return transitions[range]; 774 } 775 776 /** 777 * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. 778 */ 779 public int getRangeEnd(int range) { 780 return transitions[range+1] - 1; 781 } 782 783 /** 784 * Get the value for the range. 785 */ 786 public T getRangeValue(int range) { 787 return values[range]; 788 } 789 790 /** 791 * Get the strings that are not in the ranges. Returns null if there are none. 792 * @return 793 */ 794 public Set<String> getNonRangeStrings() { 795 if (stringMap == null || stringMap.isEmpty()) { 796 return null; 797 } 798 return Collections.unmodifiableSet(stringMap.keySet()); 799 } 800 801 static final boolean DEBUG_WRITE = false; 802 803 /* (non-Javadoc) 804 * @see java.util.Map#containsKey(java.lang.Object) 805 */ 806 public boolean containsKey(String key) { 807 return getValue(key) != null; 808 } 809 810 /* (non-Javadoc) 811 * @see java.util.Map#containsKey(java.lang.Object) 812 */ 813 public boolean containsKey(int key) { 814 return getValue(key) != null; 815 } 816 817 /* (non-Javadoc) 818 * @see java.util.Map#containsValue(java.lang.Object) 819 */ 820 public boolean containsValue(T value) { 821 // TODO Optimize 822 return getAvailableValues().contains(value); 823 } 824 825 /* (non-Javadoc) 826 * @see java.util.Map#isEmpty() 827 */ 828 public boolean isEmpty() { 829 return size() == 0; 830 } 831 832 /* (non-Javadoc) 833 * @see java.util.Map#putAll(java.util.Map) 834 */ 835 public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) { 836 for (String key : map.keySet()) { 837 put(key,map.get(key)); 838 } 839 return this; 840 } 841 842 /** 843 * Utility for extracting map 844 */ 845 public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) { 846 for (String key : keySet()) { 847 map.put(key, get(key)); 848 } 849 return this; 850 } 851 852 /* (non-Javadoc) 853 * @see java.util.Map#remove(java.lang.Object) 854 */ 855 public UnicodeMap<T> remove(String key) { 856 return put(key, null); 857 } 858 859 /* (non-Javadoc) 860 * @see java.util.Map#remove(java.lang.Object) 861 */ 862 public UnicodeMap<T> remove(int key) { 863 return put(key, null); 864 } 865 866 /* (non-Javadoc) 867 * @see java.util.Map#size() 868 */ 869 public int size() { 870 int result = stringMap == null ? 0 : stringMap.size(); 871 for (int i = 0; i < length-1; ++i) { 872 T value = values[i]; 873 if (value == null) continue; 874 result += transitions[i+1] - transitions[i]; 875 } 876 return result; 877 } 878 879 /* (non-Javadoc) 880 * @see java.util.Map#entrySet() 881 */ 882 public Iterable<Entry<String,T>> entrySet() { 883 return new EntrySetX(); 884 } 885 886 private class EntrySetX implements Iterable<Entry<String, T>> { 887 public Iterator<Entry<String, T>> iterator() { 888 return new IteratorX(); 889 } 890 public String toString() { 891 StringBuffer b = new StringBuffer(); 892 for (Iterator it = iterator(); it.hasNext();) { 893 Object item = it.next(); 894 b.append(item.toString()).append(' '); 895 } 896 return b.toString(); 897 } 898 } 899 900 private class IteratorX implements Iterator<Entry<String, T>> { 901 Iterator<String> iterator = keySet().iterator(); 902 903 /* (non-Javadoc) 904 * @see java.util.Iterator#hasNext() 905 */ 906 public boolean hasNext() { 907 return iterator.hasNext(); 908 } 909 910 /* (non-Javadoc) 911 * @see java.util.Iterator#next() 912 */ 913 public Entry<String, T> next() { 914 String key = iterator.next(); 915 return new ImmutableEntry(key, get(key)); 916 } 917 918 /* (non-Javadoc) 919 * @see java.util.Iterator#remove() 920 */ 921 public void remove() { 922 throw new UnsupportedOperationException(); 923 } 924 925 } 926 927 /** 928 * Struct-like class used to iterate over a UnicodeMap in a for loop. 929 * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null; 930 * Caution: The contents may change during the iteration! 931 */ 932 public static class EntryRange<T> { 933 public int codepoint; 934 public int codepointEnd; 935 public String string; 936 public T value; 937 @Override 938 public String toString() { 939 return (string != null ? Utility.hex(string) 940 : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd))) 941 + "=" + value; 942 } 943 } 944 945 /** 946 * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps. 947 * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration! 948 * The value is guaranteed never to be null. 949 * @return entry range, for for loops 950 */ 951 public Iterable<EntryRange<T>> entryRanges() { 952 return new EntryRanges(); 953 } 954 955 private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> { 956 private int pos; 957 private EntryRange<T> result = new EntryRange<T>(); 958 private int lastRealRange = values[length-2] == null ? length - 2 : length - 1; 959 private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator(); 960 961 public Iterator<EntryRange<T>> iterator() { 962 return this; 963 } 964 public boolean hasNext() { 965 return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext()); 966 } 967 public EntryRange<T> next() { 968 // a range may be null, but then the next one must not be (except the final range) 969 if (pos < lastRealRange) { 970 T temp = values[pos]; 971 if (temp == null) { 972 temp = values[++pos]; 973 } 974 result.codepoint = transitions[pos]; 975 result.codepointEnd = transitions[pos+1]-1; 976 result.string = null; 977 result.value = temp; 978 ++pos; 979 } else { 980 Entry<String, T> entry = stringIterator.next(); 981 result.codepoint = result.codepointEnd = -1; 982 result.string = entry.getKey(); 983 result.value = entry.getValue(); 984 } 985 return result; 986 } 987 public void remove() { 988 throw new UnsupportedOperationException(); 989 } 990 } 991 992 /* (non-Javadoc) 993 * @see java.lang.Iterable#iterator() 994 */ 995 public Iterator<String> iterator() { 996 return keySet().iterator(); 997 } 998 999 /** 1000 * Old form for compatibility 1001 */ 1002 public T getValue(String key) { 1003 return get(key); 1004 } 1005 1006 /** 1007 * Old form for compatibility 1008 */ 1009 public T getValue(int key) { 1010 // TODO Auto-generated method stub 1011 return get(key); 1012 } 1013 1014 /** 1015 * Old form for compatibility 1016 */ 1017 public Collection<T> getAvailableValues() { 1018 return values(); 1019 } 1020 1021 /** 1022 * Old form for compatibility 1023 */ 1024 public <U extends Collection<T>> U getAvailableValues(U result) { 1025 return values(result); 1026 } 1027 1028 /** 1029 * Old form for compatibility 1030 */ 1031 public UnicodeSet getSet(T value) { 1032 return keySet(value); 1033 } 1034 1035 /** 1036 * Old form for compatibility 1037 */ 1038 public UnicodeSet getSet(T value, UnicodeSet result) { 1039 return keySet(value, result); 1040 } 1041 1042 // This is to support compressed serialization. It works; just commented out for now as we shift to Generics 1043 // TODO Fix once generics are cleaned up. 1044 // // TODO Fix to serialize more than just strings. 1045 // // Only if all the items are strings will we do the following compression 1046 // // Otherwise we'll just use Java Serialization, bulky as it is 1047 // public void writeExternal(ObjectOutput out1) throws IOException { 1048 // DataOutputCompressor sc = new DataOutputCompressor(out1); 1049 // // if all objects are strings 1050 // Collection<T> availableVals = getAvailableValues(); 1051 // boolean allStrings = allAreString(availableVals); 1052 // sc.writeBoolean(allStrings); 1053 // Map object_index = new LinkedHashMap(); 1054 // if (allAreString(availableVals)) { 1055 // sc.writeStringSet(new TreeSet(availableVals), object_index); 1056 // } else { 1057 // sc.writeCollection(availableVals, object_index); 1058 // } 1059 // sc.writeUInt(length); 1060 // int lastTransition = -1; 1061 // int lastValueNumber = 0; 1062 // if (DEBUG_WRITE) System.out.println("Trans count: " + length); 1063 // for (int i = 0; i < length; ++i) { 1064 // int valueNumber = ((Integer)object_index.get(values[i])).intValue(); 1065 // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber); 1066 // 1067 // int deltaTransition = transitions[i] - lastTransition; 1068 // lastTransition = transitions[i]; 1069 // int deltaValueNumber = valueNumber - lastValueNumber; 1070 // lastValueNumber = valueNumber; 1071 // 1072 // deltaValueNumber <<= 1; // make room for one bit 1073 // boolean canCombine = deltaTransition == 1; 1074 // if (canCombine) deltaValueNumber |= 1; 1075 // sc.writeInt(deltaValueNumber); 1076 // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber); 1077 // if (!canCombine) { 1078 // sc.writeUInt(deltaTransition); 1079 // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); 1080 // } 1081 // } 1082 // sc.flush(); 1083 // } 1084 // 1085 // /** 1086 // * 1087 // */ 1088 // private boolean allAreString(Collection<T> availableValues2) { 1089 // //if (true) return false; 1090 // for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) { 1091 // if (!(it.next() instanceof String)) return false; 1092 // } 1093 // return true; 1094 // } 1095 // 1096 // public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException { 1097 // DataInputCompressor sc = new DataInputCompressor(in1); 1098 // boolean allStrings = sc.readBoolean(); 1099 // T[] valuesList; 1100 // availableValues = new LinkedHashSet(); 1101 // if (allStrings) { 1102 // valuesList = sc.readStringSet(availableValues); 1103 // } else { 1104 // valuesList = sc.readCollection(availableValues); 1105 // } 1106 // length = sc.readUInt(); 1107 // transitions = new int[length]; 1108 // if (DEBUG_WRITE) System.out.println("Trans count: " + length); 1109 // values = (T[]) new Object[length]; 1110 // int currentTransition = -1; 1111 // int currentValue = 0; 1112 // int deltaTransition; 1113 // for (int i = 0; i < length; ++i) { 1114 // int temp = sc.readInt(); 1115 // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp); 1116 // boolean combined = (temp & 1) != 0; 1117 // temp >>= 1; 1118 // values[i] = valuesList[currentValue += temp]; 1119 // if (!combined) { 1120 // deltaTransition = sc.readUInt(); 1121 // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); 1122 // } else { 1123 // deltaTransition = 1; 1124 // } 1125 // transitions[i] = currentTransition += deltaTransition; // delta value 1126 // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue); 1127 // } 1128 // } 1129 1130 public final UnicodeMap<T> removeAll(UnicodeSet set) { 1131 return putAll(set, null); 1132 } 1133 1134 public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) { 1135 return removeRetainAll(reference, true); 1136 } 1137 1138 public final UnicodeMap<T> retainAll(UnicodeSet set) { 1139 UnicodeSet toNuke = new UnicodeSet(); 1140 // TODO Optimize 1141 for (EntryRange<T> ae : entryRanges()) { 1142 if (ae.string != null) { 1143 if (!set.contains(ae.string)) { 1144 toNuke.add(ae.string); 1145 } 1146 } else { 1147 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { 1148 if (!set.contains(i)) { 1149 toNuke.add(i); 1150 } 1151 } 1152 } 1153 } 1154 return putAll(toNuke, null); 1155 } 1156 1157 public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) { 1158 return removeRetainAll(reference, false); 1159 } 1160 1161 private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) { 1162 UnicodeSet toNuke = new UnicodeSet(); 1163 // TODO Optimize 1164 for (EntryRange<T> ae : entryRanges()) { 1165 if (ae.string != null) { 1166 if (ae.value.equals(reference.get(ae.string)) == remove) { 1167 toNuke.add(ae.string); 1168 } 1169 } else { 1170 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { 1171 if (ae.value.equals(reference.get(i)) == remove) { 1172 toNuke.add(i); 1173 } 1174 } 1175 } 1176 } 1177 return putAll(toNuke, null); 1178 } 1179 1180 /** 1181 * Returns the keys that consist of multiple code points. 1182 * @return 1183 */ 1184 public Set<String> stringKeys() { 1185 return stringMap.keySet(); 1186 } 1187} 1188