1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 1996-2015, International Business Machines Corporation and    *
5 * others. All Rights Reserved.                                                *
6 *******************************************************************************
7 */
8package android.icu.dev.util;
9
10import java.util.ArrayList;
11import java.util.Collection;
12import java.util.Collections;
13import java.util.Comparator;
14import java.util.HashSet;
15import java.util.Iterator;
16import java.util.LinkedHashSet;
17import java.util.Map;
18import java.util.Map.Entry;
19import java.util.Set;
20import java.util.TreeMap;
21import java.util.TreeSet;
22
23import android.icu.impl.Utility;
24import android.icu.text.StringTransform;
25import android.icu.text.UTF16;
26import android.icu.text.UnicodeSet;
27import android.icu.text.UnicodeSetIterator;
28import android.icu.util.Freezable;
29
30/**
31 * Class for mapping Unicode characters and strings to values, optimized for single code points,
32 * where ranges of code points have the same value.
33 * Much smaller storage than using HashMap, and much faster and more compact than
34 * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some
35 * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values;
36 * that is, a put(x,null) is the same as remove(x).<br>
37 * At this point "" is also not allowed as a key, although that may change.
38 * @author markdavis
39 */
40
41public final class UnicodeMap<T> implements Cloneable, Freezable<UnicodeMap<T>>, StringTransform, Iterable<String> {
42    /**
43     * For serialization
44     */
45    //private static final long serialVersionUID = -6540936876295804105L;
46    static final boolean ASSERTIONS = false;
47    static final long GROWTH_PERCENT = 200; // 100 is no growth!
48    static final long GROWTH_GAP = 10; // extra bump!
49
50    private int length;
51    // two parallel arrays to save memory. Wish Java had structs.
52    private int[] transitions;
53    /* package private */ T[] values;
54
55    private LinkedHashSet<T> availableValues = new LinkedHashSet<T>();
56    private transient boolean staleAvailableValues;
57
58    private transient boolean errorOnReset;
59    private volatile transient boolean locked;
60    private int lastIndex;
61    private TreeMap<String,T> stringMap;
62
63    { clear(); }
64
65    public UnicodeMap<T> clear() {
66        if (locked) {
67            throw new UnsupportedOperationException("Attempt to modify locked object");
68        }
69        length = 2;
70        transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0};
71        values = (T[]) new Object[10];
72
73        availableValues.clear();
74        staleAvailableValues = false;
75
76        errorOnReset = false;
77        lastIndex = 0;
78        stringMap = null;
79        return this;
80    }
81
82    /* Boilerplate */
83    public boolean equals(Object other) {
84        if (other == null) return false;
85        try {
86            UnicodeMap that = (UnicodeMap) other;
87            if (length != that.length) return false;
88            for (int i = 0; i < length-1; ++i) {
89                if (transitions[i] != that.transitions[i]) return false;
90                if (!areEqual(values[i], that.values[i])) return false;
91            }
92            return true;
93        } catch (ClassCastException e) {
94            return false;
95        }
96    }
97
98    public static boolean areEqual(Object a , Object b) {
99        if (a == b) return true;
100        if (a == null || b == null) return false;
101        return a.equals(b);
102    }
103
104    public int hashCode() {
105        int result = length;
106        // TODO might want to abbreviate this for speed.
107        for (int i = 0; i < length-1; ++i) {
108            result = 37*result + transitions[i];
109            result = 37*result + values[i].hashCode();
110        }
111        return result;
112    }
113
114    /**
115     * Standard clone. Warning, as with Collections, does not do deep clone.
116     */
117    public UnicodeMap<T> cloneAsThawed() {
118        UnicodeMap<T> that = new UnicodeMap<T>();
119        that.length = length;
120        that.transitions = (int[]) transitions.clone();
121        that.values = (T[]) values.clone();
122        that.availableValues = new LinkedHashSet<T>(availableValues);
123        that.locked = false;
124        that.stringMap = stringMap == null ? null : (TreeMap<String, T>) stringMap.clone();
125        return that;
126    }
127
128    /* for internal consistency checking */
129
130    void _checkInvariants() {
131        if (length < 2
132                || length > transitions.length
133                || transitions.length != values.length) {
134            throw new IllegalArgumentException("Invariant failed: Lengths bad");
135        }
136        for (int i = 1; i < length-1; ++i) {
137            if (areEqual(values[i-1], values[i])) {
138                throw new IllegalArgumentException("Invariant failed: values shared at "
139                        + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
140                        + "\t" + Utility.hex(i) + ": <" + values[i] + ">"
141                );
142            }
143        }
144        if (transitions[0] != 0 || transitions[length-1] != 0x110000) {
145            throw new IllegalArgumentException("Invariant failed: bounds set wrong");
146        }
147        for (int i = 1; i < length-1; ++i) {
148            if (transitions[i-1] >= transitions[i]) {
149                throw new IllegalArgumentException("Invariant failed: not monotonic"
150                        + "\t" + Utility.hex(i-1) + ": " + transitions[i-1]
151                                                                       + "\t" + Utility.hex(i) + ": " + transitions[i]
152                );
153            }
154        }
155    }
156
157    /**
158     * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
159     * Assumes that 0 <= codepoint <= 0x10FFFF
160     * @param codepoint
161     * @return the index
162     */
163    private int _findIndex(int c) {
164        int lo = 0;
165        int hi = length - 1;
166        int i = (lo + hi) >>> 1;
167        // invariant: c >= list[lo]
168        // invariant: c < list[hi]
169        while (i != lo) {
170            if (c < transitions[i]) {
171                hi = i;
172            } else {
173                lo = i;
174            }
175            i = (lo + hi) >>> 1;
176        }
177        if (ASSERTIONS) _checkFind(c, lo);
178        return lo;
179    }
180
181    private void _checkFind(int codepoint, int value) {
182        int other = __findIndex(codepoint);
183        if (other != value) {
184            throw new IllegalArgumentException("Invariant failed: binary search"
185                    + "\t" + Utility.hex(codepoint) + ": " + value
186                    + "\tshould be: " + other);
187        }
188    }
189
190    private int __findIndex(int codepoint) {
191        for (int i = length-1; i > 0; --i) {
192            if (transitions[i] <= codepoint) return i;
193        }
194        return 0;
195    }
196
197    /*
198     * Try indexed lookup
199
200    static final int SHIFT = 8;
201    int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found
202    boolean startsValid = false;
203    private int findIndex(int codepoint) {
204        if (!startsValid) {
205            int start = 0;
206            for (int i = 1; i < length; ++i) {
207
208            }
209        }
210        for (int i = length-1; i > 0; --i) {
211           if (transitions[i] <= codepoint) return i;
212       }
213       return 0;
214   }
215     */
216
217    /**
218     * Remove the items from index through index+count-1.
219     * Logically reduces the size of the internal arrays.
220     * @param index
221     * @param count
222     */
223    private void _removeAt(int index, int count) {
224        for (int i = index + count; i < length; ++i) {
225            transitions[i-count] = transitions[i];
226            values[i-count] = values[i];
227        }
228        length -= count;
229    }
230    /**
231     * Add a gap from index to index+count-1.
232     * The values there are undefined, and must be set.
233     * Logically grows arrays to accomodate. Actual growth is limited
234     * @param index
235     * @param count
236     */
237    private void _insertGapAt(int index, int count) {
238        int newLength = length + count;
239        int[] oldtransitions = transitions;
240        T[] oldvalues = values;
241        if (newLength > transitions.length) {
242            int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100);
243            transitions = new int[allocation];
244            values = (T[]) new Object[allocation];
245            for (int i = 0; i < index; ++i) {
246                transitions[i] = oldtransitions[i];
247                values[i] = oldvalues[i];
248            }
249        }
250        for (int i = length - 1; i >= index; --i) {
251            transitions[i+count] = oldtransitions[i];
252            values[i+count] = oldvalues[i];
253        }
254        length = newLength;
255    }
256
257    /**
258     * Associates code point with value. Removes any previous association.
259     * All code that calls this MUST check for frozen first!
260     * @param codepoint
261     * @param value
262     * @return this, for chaining
263     */
264    private UnicodeMap _put(int codepoint, T value) {
265        // Warning: baseIndex is an invariant; must
266        // be defined such that transitions[baseIndex] < codepoint
267        // at end of this routine.
268        int baseIndex;
269        if (transitions[lastIndex] <= codepoint
270                && codepoint < transitions[lastIndex+1]) {
271            baseIndex = lastIndex;
272        } else {
273            baseIndex = _findIndex(codepoint);
274        }
275        int limitIndex = baseIndex + 1;
276        // cases are (a) value is already set
277        if (areEqual(values[baseIndex], value)) return this;
278        if (locked) {
279            throw new UnsupportedOperationException("Attempt to modify locked object");
280        }
281        if (errorOnReset && values[baseIndex] != null) {
282            throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
283                    + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
284        }
285
286        // adjust the available values
287        staleAvailableValues = true;
288        availableValues.add(value); // add if not there already
289
290        int baseCP = transitions[baseIndex];
291        int limitCP = transitions[limitIndex];
292        // we now start walking through the difference case,
293        // based on whether we are at the start or end of range
294        // and whether the range is a single character or multiple
295
296        if (baseCP == codepoint) {
297            // CASE: At very start of range
298            boolean connectsWithPrevious =
299                baseIndex != 0 && areEqual(value, values[baseIndex-1]);
300
301            if (limitCP == codepoint + 1) {
302                // CASE: Single codepoint range
303                boolean connectsWithFollowing =
304                    baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
305
306                if (connectsWithPrevious) {
307                    // A1a connects with previous & following, so remove index
308                    if (connectsWithFollowing) {
309                        _removeAt(baseIndex, 2);
310                    } else {
311                        _removeAt(baseIndex, 1); // extend previous
312                    }
313                    --baseIndex; // fix up
314                } else if (connectsWithFollowing) {
315                    _removeAt(baseIndex, 1); // extend following backwards
316                    transitions[baseIndex] = codepoint;
317                } else {
318                    // doesn't connect on either side, just reset
319                    values[baseIndex] = value;
320                }
321            } else if (connectsWithPrevious) {
322                // A.1: start of multi codepoint range
323                // if connects
324                ++transitions[baseIndex]; // extend previous
325            } else {
326                // otherwise insert new transition
327                transitions[baseIndex] = codepoint+1; // fix following range
328                _insertGapAt(baseIndex, 1);
329                values[baseIndex] = value;
330                transitions[baseIndex] = codepoint;
331            }
332        } else if (limitCP == codepoint + 1) {
333            // CASE: at end of range
334            // if connects, just back up range
335            boolean connectsWithFollowing =
336                baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
337
338            if (connectsWithFollowing) {
339                --transitions[limitIndex];
340                return this;
341            } else {
342                _insertGapAt(limitIndex, 1);
343                transitions[limitIndex] = codepoint;
344                values[limitIndex] = value;
345            }
346        } else {
347            // CASE: in middle of range
348            // insert gap, then set the new range
349            _insertGapAt(++baseIndex,2);
350            transitions[baseIndex] = codepoint;
351            values[baseIndex] = value;
352            transitions[baseIndex+1] = codepoint + 1;
353            values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
354        }
355        lastIndex = baseIndex; // store for next time
356        return this;
357    }
358
359    private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) {
360        // TODO optimize
361        for (int i = startCodePoint; i <= endCodePoint; ++i) {
362            _put(i, value);
363            if (ASSERTIONS) _checkInvariants();
364        }
365        return this;
366    }
367
368    /**
369     * Sets the codepoint value.
370     * @param codepoint
371     * @param value
372     * @return this (for chaining)
373     */
374    public UnicodeMap<T> put(int codepoint, T value) {
375        if (codepoint < 0 || codepoint > 0x10FFFF) {
376            throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
377        }
378        _put(codepoint, value);
379        if (ASSERTIONS) _checkInvariants();
380        return this;
381    }
382
383    /**
384     * Sets the codepoint value.
385     * @param codepoint
386     * @param value
387     * @return this (for chaining)
388     */
389    public UnicodeMap<T> put(String string, T value) {
390        int v = UnicodeSet.getSingleCodePoint(string);
391        if (v == Integer.MAX_VALUE) {
392            if (locked) {
393                throw new UnsupportedOperationException("Attempt to modify locked object");
394            }
395            if (value != null) {
396                if (stringMap == null) {
397                    stringMap = new TreeMap<String,T>();
398                }
399                stringMap.put(string, value);
400                staleAvailableValues = true;
401            } else if (stringMap != null) {
402                if (stringMap.remove(string) != null) {
403                    staleAvailableValues = true;
404                }
405            }
406            return this;
407        }
408        return put(v, value);
409    }
410
411    /**
412     * Adds bunch o' codepoints; otherwise like put.
413     * @param codepoints
414     * @param value
415     * @return this (for chaining)
416     */
417    public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) {
418        UnicodeSetIterator it = new UnicodeSetIterator(codepoints);
419        while (it.nextRange()) {
420            if (it.string == null) {
421                _putAll(it.codepoint, it.codepointEnd, value);
422            } else {
423                put(it.string, value);
424            }
425        }
426        return this;
427    }
428
429    /**
430     * Adds bunch o' codepoints; otherwise like add.
431     * @param startCodePoint
432     * @param endCodePoint
433     * @param value
434     * @return this (for chaining)
435     */
436    public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) {
437        if (locked) {
438            throw new UnsupportedOperationException("Attempt to modify locked object");
439        }
440        if (startCodePoint < 0 || endCodePoint > 0x10FFFF) {
441            throw new IllegalArgumentException("Codepoint out of range: "
442                    + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint));
443        }
444        return _putAll(startCodePoint, endCodePoint, value);
445    }
446
447    /**
448     * Add all the (main) values from a UnicodeMap
449     * @param unicodeMap the property to add to the map
450     * @return this (for chaining)
451     */
452    public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) {
453        for (int i = 0; i < unicodeMap.length; ++i) {
454            T value = unicodeMap.values[i];
455            if (value != null) {
456                _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value);
457            }
458            if (ASSERTIONS) _checkInvariants();
459        }
460        if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) {
461            if (stringMap == null) {
462                stringMap = new TreeMap<String,T>();
463            }
464            stringMap.putAll(unicodeMap.stringMap);
465        }
466        return this;
467    }
468
469    /**
470     * Add all the (main) values from a Unicode property
471     * @param prop the property to add to the map
472     * @return this (for chaining)
473     */
474    public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) {
475        // TODO optimize
476        for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) {
477            if (it.codepoint != UnicodeSetIterator.IS_STRING) {
478                T value = prop.getValue(it.codepoint);
479                if (value != null) {
480                    _put(it.codepoint, value);
481                }
482            }
483        }
484        // now do the strings
485        for (String key : filter.strings()) {
486            T value = prop.get(key);
487            if (value != null) {
488                put(key, value);
489            }
490        }
491        return this;
492    }
493
494    /**
495     * Set the currently unmapped Unicode code points to the given value.
496     * @param value the value to set
497     * @return this (for chaining)
498     */
499    public UnicodeMap<T> setMissing(T value) {
500        // fast path, if value not yet present
501        if (!getAvailableValues().contains(value)) {
502            staleAvailableValues = true;
503            availableValues.add(value);
504            for (int i = 0; i < length; ++i) {
505                if (values[i] == null) values[i] = value;
506            }
507            return this;
508        } else {
509            return putAll(keySet(null), value);
510        }
511    }
512    /**
513     * Returns the keyset consisting of all the keys that would produce the given value. Deposits into
514     * result if it is not null. Remember to clear if you just want
515     * the new values.
516     */
517    public UnicodeSet keySet(T value, UnicodeSet result) {
518        if (result == null) result = new UnicodeSet();
519        for (int i = 0; i < length - 1; ++i) {
520            if (areEqual(value, values[i])) {
521                result.add(transitions[i], transitions[i+1]-1);
522            }
523        }
524        if (value != null && stringMap != null) {
525            for (String key : stringMap.keySet()) {
526                T newValue = stringMap.get(key);
527                if (value.equals(newValue)) {
528                    result.add((String)key);
529                }
530            }
531        }
532        return result;
533    }
534
535    /**
536     * Returns the keyset consisting of all the keys that would produce the given value.
537     * the new values.
538     */
539    public UnicodeSet keySet(T value) {
540        return keySet(value,null);
541    }
542
543    /**
544     * Returns the keyset consisting of all the keys that would produce (non-null) values.
545     */
546    public UnicodeSet keySet() {
547        UnicodeSet result = new UnicodeSet();
548        for (int i = 0; i < length - 1; ++i) {
549            if (values[i] != null) {
550                result.add(transitions[i], transitions[i+1]-1);
551            }
552        }
553        if (stringMap != null) {
554            result.addAll(stringMap.keySet());
555        }
556        return result;
557    }
558
559    /**
560     * Returns the list of possible values. Deposits each non-null value into
561     * result. Creates result if it is null. Remember to clear result if
562     * you are not appending to existing collection.
563     * @param result
564     * @return result
565     */
566    public <U extends Collection<T>> U values(U result) {
567        if (staleAvailableValues) {
568            // collect all the current values
569            // retain them in the availableValues
570            Set<T> temp = new HashSet<T>();
571            for (int i = 0; i < length - 1; ++i) {
572                if (values[i] != null) temp.add(values[i]);
573            }
574            availableValues.retainAll(temp);
575            if (stringMap != null) {
576                availableValues.addAll(stringMap.values());
577            }
578            staleAvailableValues = false;
579        }
580        if (result == null) {
581            result = (U) new ArrayList<T>(availableValues.size());
582        }
583        result.addAll(availableValues);
584        return result;
585    }
586
587    /**
588     * Convenience method
589     */
590    public Collection<T> values() {
591        return getAvailableValues(null);
592    }
593    /**
594     * Gets the value associated with a given code point.
595     * Returns null, if there is no such value.
596     * @param codepoint
597     * @return the value
598     */
599    public T get(int codepoint) {
600        if (codepoint < 0 || codepoint > 0x10FFFF) {
601            throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
602        }
603        return values[_findIndex(codepoint)];
604    }
605
606    /**
607     * Gets the value associated with a given code point.
608     * Returns null, if there is no such value.
609     * @param codepoint
610     * @return the value
611     */
612    public T get(String value) {
613        if (UTF16.hasMoreCodePointsThan(value, 1)) {
614            if (stringMap == null) {
615                return null;
616            }
617            return stringMap.get(value);
618        }
619        return getValue(UTF16.charAt(value, 0));
620    }
621
622
623    /**
624     * Change a new string from the source string according to the mappings.
625     * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString()
626     * TODO: extend to strings
627     * @param source
628     * @return
629     */
630    public String transform(String source) {
631        StringBuffer result = new StringBuffer();
632        int cp;
633        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
634            cp = UTF16.charAt(source, i);
635            T mResult = getValue(cp);
636            if (mResult != null) {
637                result.append(mResult);
638            } else {
639                UTF16.append(result, cp);
640            }
641        }
642        return result.toString();
643    }
644
645    /**
646     * Used to add complex values, where the value isn't replaced but in some sense composed
647     * @author markdavis
648     */
649    public abstract static class Composer<T> {
650        /**
651         * This will be called with either a string or a code point. The result is the new value for that item.
652         * If the codepoint is used, the string is null; if the string is used, the codepoint is -1.
653         * @param a
654         * @param b
655         */
656        public abstract T compose(int codePoint, String string, T a, T b);
657    }
658
659    public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) {
660        for (T value : other.getAvailableValues()) {
661            UnicodeSet set = other.keySet(value);
662            composeWith(set, value, composer);
663        }
664        return this;
665    }
666
667    public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) {
668        for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
669            int i = it.codepoint;
670            if (i == UnicodeSetIterator.IS_STRING) {
671                String s = it.string;
672                T v1 = getValue(s);
673                T v3 = composer.compose(-1, s, v1, value);
674                if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
675                    put(s, v3);
676                }
677            } else {
678                T v1 = getValue(i);
679                T v3 = composer.compose(i, null, v1, value);
680                if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
681                    put(i, v3);
682                }
683            }
684        }
685        return this;
686    }
687
688    public String toString() {
689        return toString(null);
690    }
691
692    public String toString(Comparator<T> collected) {
693        StringBuffer result = new StringBuffer();
694        if (collected == null) {
695            for (int i = 0; i < length-1; ++i) {
696                T value = values[i];
697                if (value == null) continue;
698                int start = transitions[i];
699                int end = transitions[i+1]-1;
700                result.append(Utility.hex(start));
701                if (start != end) result.append("-").append(Utility.hex(end));
702                result.append("=").append(value.toString()).append("\n");
703            }
704            if (stringMap != null) {
705                for (String s : stringMap.keySet()) {
706                    result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
707                }
708            }
709        } else {
710            Set<T> set = values(new TreeSet<T>(collected));
711            for (Iterator<T> it = set.iterator(); it.hasNext();) {
712                T value = it.next();
713                UnicodeSet s = keySet(value);
714                result.append(value).append("=").append(s.toString()).append("\n");
715            }
716        }
717        return result.toString();
718    }
719    /**
720     * @return Returns the errorOnReset value.
721     */
722    public boolean getErrorOnReset() {
723        return errorOnReset;
724    }
725    /**
726     * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception.
727     * @param errorOnReset The errorOnReset to set.
728     */
729    public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) {
730        this.errorOnReset = errorOnReset;
731        return this;
732    }
733
734    /* (non-Javadoc)
735     * @see android.icu.dev.test.util.Freezable#isFrozen()
736     */
737    public boolean isFrozen() {
738        // TODO Auto-generated method stub
739        return locked;
740    }
741
742    /* (non-Javadoc)
743     * @see android.icu.dev.test.util.Freezable#lock()
744     */
745    public UnicodeMap<T> freeze() {
746        locked = true;
747        return this;
748    }
749
750    /**
751     * Utility to find the maximal common prefix of two strings.
752     * TODO: fix supplemental support
753     */
754    static public int findCommonPrefix(String last, String s) {
755        int minLen = Math.min(last.length(), s.length());
756        for (int i = 0; i < minLen; ++i) {
757            if (last.charAt(i) != s.charAt(i)) return i;
758        }
759        return minLen;
760    }
761
762    /**
763     * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings().
764     */
765    public int getRangeCount() {
766        return length-1;
767    }
768
769    /**
770     * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
771     */
772    public int getRangeStart(int range) {
773        return transitions[range];
774    }
775
776    /**
777     * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
778     */
779    public int getRangeEnd(int range) {
780        return transitions[range+1] - 1;
781    }
782
783    /**
784     * Get the value for the range.
785     */
786    public T getRangeValue(int range) {
787        return values[range];
788    }
789
790    /**
791     * Get the strings that are not in the ranges. Returns null if there are none.
792     * @return
793     */
794    public Set<String> getNonRangeStrings() {
795        if (stringMap == null || stringMap.isEmpty()) {
796            return null;
797        }
798        return Collections.unmodifiableSet(stringMap.keySet());
799    }
800
801    static final boolean DEBUG_WRITE = false;
802
803    /* (non-Javadoc)
804     * @see java.util.Map#containsKey(java.lang.Object)
805     */
806    public boolean containsKey(String key) {
807        return getValue(key) != null;
808    }
809
810    /* (non-Javadoc)
811     * @see java.util.Map#containsKey(java.lang.Object)
812     */
813    public boolean containsKey(int key) {
814        return getValue(key) != null;
815    }
816
817    /* (non-Javadoc)
818     * @see java.util.Map#containsValue(java.lang.Object)
819     */
820    public boolean containsValue(T value) {
821        // TODO Optimize
822        return getAvailableValues().contains(value);
823    }
824
825    /* (non-Javadoc)
826     * @see java.util.Map#isEmpty()
827     */
828    public boolean isEmpty() {
829        return size() == 0;
830    }
831
832    /* (non-Javadoc)
833     * @see java.util.Map#putAll(java.util.Map)
834     */
835    public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) {
836        for (String key : map.keySet()) {
837            put(key,map.get(key));
838        }
839        return this;
840    }
841
842    /**
843     * Utility for extracting map
844     */
845    public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) {
846        for (String key : keySet()) {
847            map.put(key, get(key));
848        }
849        return this;
850    }
851
852    /* (non-Javadoc)
853     * @see java.util.Map#remove(java.lang.Object)
854     */
855    public UnicodeMap<T> remove(String key) {
856        return put(key, null);
857    }
858
859    /* (non-Javadoc)
860     * @see java.util.Map#remove(java.lang.Object)
861     */
862    public UnicodeMap<T> remove(int key) {
863        return put(key, null);
864    }
865
866    /* (non-Javadoc)
867     * @see java.util.Map#size()
868     */
869    public int size() {
870        int result = stringMap == null ? 0 : stringMap.size();
871        for (int i = 0; i < length-1; ++i) {
872            T value = values[i];
873            if (value == null) continue;
874            result += transitions[i+1] - transitions[i];
875        }
876        return result;
877    }
878
879    /* (non-Javadoc)
880     * @see java.util.Map#entrySet()
881     */
882    public Iterable<Entry<String,T>> entrySet() {
883        return new EntrySetX();
884    }
885
886    private class EntrySetX implements Iterable<Entry<String, T>> {
887        public Iterator<Entry<String, T>> iterator() {
888            return new IteratorX();
889        }
890        public String toString() {
891            StringBuffer b = new StringBuffer();
892            for (Iterator it = iterator(); it.hasNext();) {
893                Object item = it.next();
894                b.append(item.toString()).append(' ');
895            }
896            return b.toString();
897        }
898    }
899
900    private class IteratorX implements Iterator<Entry<String, T>> {
901        Iterator<String> iterator = keySet().iterator();
902
903        /* (non-Javadoc)
904         * @see java.util.Iterator#hasNext()
905         */
906        public boolean hasNext() {
907            return iterator.hasNext();
908        }
909
910        /* (non-Javadoc)
911         * @see java.util.Iterator#next()
912         */
913        public Entry<String, T> next() {
914            String key = iterator.next();
915            return new ImmutableEntry(key, get(key));
916        }
917
918        /* (non-Javadoc)
919         * @see java.util.Iterator#remove()
920         */
921        public void remove() {
922            throw new UnsupportedOperationException();
923        }
924
925    }
926
927    /**
928     * Struct-like class used to iterate over a UnicodeMap in a for loop.
929     * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null;
930     * Caution: The contents may change during the iteration!
931     */
932    public static class EntryRange<T> {
933        public int codepoint;
934        public int codepointEnd;
935        public String string;
936        public T value;
937        @Override
938        public String toString() {
939            return (string != null ? Utility.hex(string)
940                    : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd)))
941                    + "=" + value;
942        }
943    }
944
945    /**
946     * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps.
947     * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration!
948     * The value is guaranteed never to be null.
949     * @return entry range, for for loops
950     */
951    public Iterable<EntryRange<T>> entryRanges() {
952        return new EntryRanges();
953    }
954
955    private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> {
956        private int pos;
957        private EntryRange<T> result = new EntryRange<T>();
958        private int lastRealRange = values[length-2] == null ? length - 2 : length - 1;
959        private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator();
960
961        public Iterator<EntryRange<T>> iterator() {
962            return this;
963        }
964        public boolean hasNext() {
965            return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext());
966        }
967        public EntryRange<T> next() {
968            // a range may be null, but then the next one must not be (except the final range)
969            if (pos < lastRealRange) {
970                T temp = values[pos];
971                if (temp == null) {
972                    temp = values[++pos];
973                }
974                result.codepoint = transitions[pos];
975                result.codepointEnd = transitions[pos+1]-1;
976                result.string = null;
977                result.value = temp;
978                ++pos;
979            } else {
980                Entry<String, T> entry = stringIterator.next();
981                result.codepoint = result.codepointEnd = -1;
982                result.string = entry.getKey();
983                result.value = entry.getValue();
984            }
985            return result;
986        }
987        public void remove() {
988            throw new UnsupportedOperationException();
989        }
990    }
991
992    /* (non-Javadoc)
993     * @see java.lang.Iterable#iterator()
994     */
995    public Iterator<String> iterator() {
996        return keySet().iterator();
997    }
998
999    /**
1000     * Old form for compatibility
1001     */
1002    public T getValue(String key) {
1003        return get(key);
1004    }
1005
1006    /**
1007     * Old form for compatibility
1008     */
1009    public T getValue(int key) {
1010        // TODO Auto-generated method stub
1011        return get(key);
1012    }
1013
1014    /**
1015     * Old form for compatibility
1016     */
1017    public Collection<T> getAvailableValues() {
1018        return values();
1019    }
1020
1021    /**
1022     * Old form for compatibility
1023     */
1024    public <U extends Collection<T>> U getAvailableValues(U result) {
1025        return values(result);
1026    }
1027
1028    /**
1029     * Old form for compatibility
1030     */
1031    public UnicodeSet getSet(T value) {
1032        return keySet(value);
1033    }
1034
1035    /**
1036     * Old form for compatibility
1037     */
1038    public UnicodeSet getSet(T value, UnicodeSet result) {
1039        return keySet(value, result);
1040    }
1041
1042    // This is to support compressed serialization. It works; just commented out for now as we shift to Generics
1043    // TODO Fix once generics are cleaned up.
1044    //    // TODO Fix to serialize more than just strings.
1045    //    // Only if all the items are strings will we do the following compression
1046    //    // Otherwise we'll just use Java Serialization, bulky as it is
1047    //    public void writeExternal(ObjectOutput out1) throws IOException {
1048    //        DataOutputCompressor sc = new DataOutputCompressor(out1);
1049    //        // if all objects are strings
1050    //        Collection<T> availableVals = getAvailableValues();
1051    //        boolean allStrings = allAreString(availableVals);
1052    //        sc.writeBoolean(allStrings);
1053    //        Map object_index = new LinkedHashMap();
1054    //        if (allAreString(availableVals)) {
1055    //            sc.writeStringSet(new TreeSet(availableVals), object_index);
1056    //        } else {
1057    //            sc.writeCollection(availableVals, object_index);
1058    //        }
1059    //        sc.writeUInt(length);
1060    //        int lastTransition = -1;
1061    //        int lastValueNumber = 0;
1062    //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1063    //        for (int i = 0; i < length; ++i) {
1064    //            int valueNumber = ((Integer)object_index.get(values[i])).intValue();
1065    //            if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber);
1066    //
1067    //            int deltaTransition = transitions[i] - lastTransition;
1068    //            lastTransition = transitions[i];
1069    //            int deltaValueNumber = valueNumber - lastValueNumber;
1070    //            lastValueNumber = valueNumber;
1071    //
1072    //            deltaValueNumber <<= 1; // make room for one bit
1073    //            boolean canCombine = deltaTransition == 1;
1074    //            if (canCombine) deltaValueNumber |= 1;
1075    //            sc.writeInt(deltaValueNumber);
1076    //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber);
1077    //            if (!canCombine) {
1078    //                sc.writeUInt(deltaTransition);
1079    //                if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1080    //            }
1081    //        }
1082    //        sc.flush();
1083    //    }
1084    //
1085    //    /**
1086    //     *
1087    //     */
1088    //    private boolean allAreString(Collection<T> availableValues2) {
1089    //        //if (true) return false;
1090    //        for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) {
1091    //            if (!(it.next() instanceof String)) return false;
1092    //        }
1093    //        return true;
1094    //    }
1095    //
1096    //    public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException {
1097    //        DataInputCompressor sc = new DataInputCompressor(in1);
1098    //        boolean allStrings = sc.readBoolean();
1099    //        T[] valuesList;
1100    //        availableValues = new LinkedHashSet();
1101    //        if (allStrings) {
1102    //            valuesList = sc.readStringSet(availableValues);
1103    //        } else {
1104    //            valuesList = sc.readCollection(availableValues);
1105    //        }
1106    //        length = sc.readUInt();
1107    //        transitions = new int[length];
1108    //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1109    //        values = (T[]) new Object[length];
1110    //        int currentTransition = -1;
1111    //        int currentValue = 0;
1112    //        int deltaTransition;
1113    //        for (int i = 0; i < length; ++i) {
1114    //            int temp = sc.readInt();
1115    //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp);
1116    //            boolean combined = (temp & 1) != 0;
1117    //            temp >>= 1;
1118    //        values[i] = valuesList[currentValue += temp];
1119    //        if (!combined) {
1120    //            deltaTransition = sc.readUInt();
1121    //            if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1122    //        } else {
1123    //            deltaTransition = 1;
1124    //        }
1125    //        transitions[i] = currentTransition += deltaTransition; // delta value
1126    //        if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue);
1127    //        }
1128    //    }
1129
1130    public final UnicodeMap<T> removeAll(UnicodeSet set) {
1131        return putAll(set, null);
1132    }
1133
1134    public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) {
1135        return removeRetainAll(reference, true);
1136    }
1137
1138    public final UnicodeMap<T> retainAll(UnicodeSet set) {
1139        UnicodeSet toNuke = new UnicodeSet();
1140        // TODO Optimize
1141        for (EntryRange<T> ae : entryRanges()) {
1142            if (ae.string != null) {
1143                if (!set.contains(ae.string)) {
1144                    toNuke.add(ae.string);
1145                }
1146            } else {
1147                for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1148                    if (!set.contains(i)) {
1149                        toNuke.add(i);
1150                    }
1151                }
1152            }
1153        }
1154        return putAll(toNuke, null);
1155    }
1156
1157    public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) {
1158        return removeRetainAll(reference, false);
1159    }
1160
1161    private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) {
1162        UnicodeSet toNuke = new UnicodeSet();
1163        // TODO Optimize
1164        for (EntryRange<T> ae : entryRanges()) {
1165            if (ae.string != null) {
1166                if (ae.value.equals(reference.get(ae.string)) == remove) {
1167                    toNuke.add(ae.string);
1168                }
1169            } else {
1170                for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1171                    if (ae.value.equals(reference.get(i)) == remove) {
1172                        toNuke.add(i);
1173                    }
1174                }
1175            }
1176        }
1177        return putAll(toNuke, null);
1178    }
1179
1180    /**
1181     * Returns the keys that consist of multiple code points.
1182     * @return
1183     */
1184    public Set<String> stringKeys() {
1185        return stringMap.keySet();
1186    }
1187}
1188