// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License /* ******************************************************************************* * Copyright (C) 2011, Google, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.dev.test.util; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import com.ibm.icu.impl.Utility; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.BytesTrie.Result; import com.ibm.icu.util.BytesTrieBuilder; import com.ibm.icu.util.CharsTrie; import com.ibm.icu.util.CharsTrieBuilder; import com.ibm.icu.util.StringTrieBuilder.Option; // would be nice to have a BytesTrieBuilder.add(aByte); // question: can bytetrie store <"",x>? // can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error, // should happen on add, not on build. // the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something. // need class description; examples of usage; which method can/should be called after which others. public abstract class TrieMap implements Iterable> { public enum Style { BYTES, CHARS } private static final boolean DEBUG = true; protected final V[] intToValue; protected final int size; protected TrieMap(V[] intToValue, int size) { this.intToValue = intToValue; this.size = size; } public int keyByteSize() { return size; } public static abstract class Builder { Option option; protected List intToValueTemp = new ArrayList(); protected Map valueToIntegerTemp = new HashMap(); static public Builder with(Style style, Map keyValuePairs) { return with(style, Option.SMALL, keyValuePairs); } static public Builder with(Style style, Option option, Map keyValuePairs) { Builder result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder() : new CharsTrieMap.CharsBuilder(); result.option = option; return result.addAll(keyValuePairs); } static public Builder with(Style style, K key, V value) { return with(style, Option.SMALL, key, value); } static public Builder with(Style style, Option option, K key, V value) { Builder result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder() : new CharsTrieMap.CharsBuilder(); result.option = option; return result.add(key, value); } public abstract Builder add(CharSequence key, V value); public abstract Builder addAll(Map keyValuePairs); public abstract TrieMap build(); } abstract public V get(CharSequence test); /** * Warning: the entry contents are only valid until the next next() call!! */ abstract public Iterator> iterator(); abstract public Matcher getMatcher(); public abstract static class Matcher { protected CharSequence text = ""; protected int start = 0; protected int current = 0; abstract void set(CharSequence string, int i); abstract boolean next(); abstract V getValue(); abstract int getStart(); abstract int getEnd(); abstract boolean nextStart(); } private static class BytesTrieMap extends TrieMap { private final BytesTrie bytesTrie; private byte[] bytes = new byte[3]; private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) { super(intToValue, size); this.bytesTrie = bytesTrie; } public V get(CharSequence test) { int length = test.length(); bytesTrie.reset(); if (length == 0) { return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null; } Result result = Result.NO_VALUE; for (int i = 0; i < length; ++i) { if (!result.hasNext()) { return null; } char c = test.charAt(i); int limit = ByteConverter.getBytes(c, bytes, 0); result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit); } return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; // int length = test.length(); // if (length == 0) { // return null; // } // bytesTrie.reset(); // Result result = null; // byte[] bytes = new byte[3]; // for (int i = 0; i < length; ++i) { // char c = test.charAt(i); // int limit = ByteConverter.getBytes(c, bytes, 0); // for (int j = 0; j < limit; ++j) { // result = bytesTrie.next(bytes[j]&0xFF); // if (!result.matches()) { // return null; // } // } // } // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; } public String toString() { return toString(bytesTrie, " : ", "\n"); } /** * Warning: the entry contents are only valid until the next next() call!! */ public Iterator> iterator() { // TODO Auto-generated method stub return new BytesIterator(); } public TrieMap.Matcher getMatcher() { return new BytesMatcher(); } private class BytesIterator implements Iterator> { BytesTrie.Iterator iterator = bytesTrie.iterator(); BytesEntry entry = new BytesEntry(); public boolean hasNext() { return iterator.hasNext(); } public Entry next() { entry.bytesEntry = iterator.next(); return entry; } public void remove() { throw new UnsupportedOperationException(); } } private class BytesEntry implements Entry { public BytesTrie.Entry bytesEntry; StringBuilder buffer = new StringBuilder(); public CharSequence getKey() { buffer.setLength(0); ByteConverter.getChars(bytesEntry, buffer); return buffer; } public V getValue() { return intToValue[bytesEntry.value]; } public V setValue(V value) { throw new UnsupportedOperationException(); } } public class BytesMatcher extends Matcher { private byte[] bytes = new byte[3]; private V value = null; public void set(CharSequence text, int start) { this.text = text; this.start = start; this.current = start; } public int getStart() { return start; } public int getEnd() { return current; } /** * Finds the next match. Returns false when there are no possible further matches from the current start * point. Once that happens, call nextStart(); Call getValue to get the current value. * * @return false when done. There may be a value, however. */ public boolean next() { while (current < text.length()) { char c = text.charAt(current++); int limit = ByteConverter.getBytes(c, bytes, 0); for (int j = 0; j < limit; ++j) { Result result = bytesTrie.next(bytes[j]); if (result.hasValue()) { if (j < limit - 1) { throw new IllegalArgumentException("Data corrupt"); } value = intToValue[bytesTrie.getValue()]; return result.hasNext(); } else if (!result.matches()) { value = null; return false; } } } value = null; return false; } public boolean nextStart() { if (start >= text.length()) { return false; } ++start; current = start; bytesTrie.reset(); return true; } public V getValue() { return value; } } } public static class BytesBuilder extends Builder { BytesTrieBuilder builder = new BytesTrieBuilder(); byte[] bytes = new byte[200]; List debugBytes = DEBUG ? new ArrayList() : null; public BytesBuilder add(CharSequence key, V value) { // traverse the values, and get a mapping of a byte string to list of // integers, and a mapping from those integers to a set of values Integer index; if (option == Option.SMALL) { index = valueToIntegerTemp.get(value); if (index == null) { index = intToValueTemp.size(); intToValueTemp.add(value); valueToIntegerTemp.put(value, index); } } else { index = intToValueTemp.size(); intToValueTemp.add(value); } // dumb implementation for now // the buffer size is at most 3 * number_of_chars if (bytes.length < key.length() * 3) { bytes = new byte[64 + key.length() * 3]; } int limit = 0; for (int i = 0; i < key.length(); ++i) { char c = key.charAt(i); limit = ByteConverter.getBytes(c, bytes, limit); } try { builder.add(bytes, limit, index); return this; } catch (Exception e) { ArrayList list = new ArrayList(); for (int i = 0; i < limit; ++i) { list.add(Utility.hex(bytes[i])); } throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e); } } public BytesBuilder addAll(Map keyValuePairs) { for (Entry entry : keyValuePairs.entrySet()) { add(entry.getKey(), entry.getValue()); } return this; } public TrieMap build() { int size = builder.buildByteBuffer(option).remaining(); BytesTrie bytesTrie = builder.build(option); @SuppressWarnings("unchecked") V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); return new BytesTrieMap(bytesTrie, intToValueArray, size); } } private static class CharsTrieMap extends TrieMap { private final CharsTrie charsTrie; private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) { super(intToValue, size); this.charsTrie = charsTrie; } public V get(CharSequence test) { Result result = charsTrie.reset().next(test, 0, test.length()); return result.hasValue() ? intToValue[charsTrie.getValue()] : null; } public String toString() { return toString(charsTrie, " : ", "\n"); } /** * Warning: the entry contents are only valid until the next next() call!! */ public Iterator> iterator() { // TODO Auto-generated method stub return new CharsIterator(); } public TrieMap.Matcher getMatcher() { return new CharsMatcher(); } private class CharsIterator implements Iterator> { CharsTrie.Iterator iterator = charsTrie.iterator(); CharsEntry entry = new CharsEntry(); public boolean hasNext() { return iterator.hasNext(); } public Entry next() { entry.charsEntry = iterator.next(); return entry; } public void remove() { throw new UnsupportedOperationException(); } } private class CharsEntry implements Entry { public CharsTrie.Entry charsEntry; public CharSequence getKey() { return charsEntry.chars; } public V getValue() { return intToValue[charsEntry.value]; } public V setValue(V value) { throw new UnsupportedOperationException(); } } public class CharsMatcher extends Matcher { private V value = null; public void set(CharSequence text, int start) { this.text = text; this.start = start; this.current = start; } public int getStart() { return start; } public int getEnd() { return current; } /** * Finds the next match. Returns false when there are no possible further matches from the current start * point. Once that happens, call nextStart(); Call getValue to get the current value. * * @return false when done. There may be a value, however. */ public boolean next() { while (current < text.length()) { char c = text.charAt(current++); Result result = charsTrie.next(c); if (result.hasValue()) { value = intToValue[charsTrie.getValue()]; return result.hasNext(); } else if (!result.matches()) { value = null; return false; } } value = null; return false; } public boolean nextStart() { if (start >= text.length()) { return false; } ++start; current = start; charsTrie.reset(); return true; } public V getValue() { return value; } } } public static class CharsBuilder extends Builder { CharsTrieBuilder builder = new CharsTrieBuilder(); public CharsBuilder add(CharSequence key, V value) { // traverse the values, and get a mapping of a byte string to list of // integers, and a mapping from those integers to a set of values Integer index; if (option == Option.SMALL) { index = valueToIntegerTemp.get(value); if (index == null) { index = intToValueTemp.size(); intToValueTemp.add(value); valueToIntegerTemp.put(value, index); } } else { index = intToValueTemp.size(); intToValueTemp.add(value); } try { builder.add(key.toString(), index); return this; } catch (Exception e) { throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e); } } public CharsBuilder addAll(Map keyValuePairs) { for (Entry entry : keyValuePairs.entrySet()) { add(entry.getKey(), entry.getValue()); } return this; } public TrieMap build() { CharSequence buildCharSequence = builder.buildCharSequence(option); int size = 2 * buildCharSequence.length(); // CharsTrie charsTrie = builder.build(option); CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0); @SuppressWarnings("unchecked") V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); return new CharsTrieMap(charsTrie, intToValueArray, size); } } /** * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended * for general usage * *
     * 0000..007F - 0xxx xxxx
     * 0000..7EFF - 1yyy yyyy xxxx xxxx
     * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx
     * 
*/ static class ByteConverter { public static int getBytes(char source, byte[] bytes, int limit) { if (source < 0x80) { bytes[limit++] = (byte) source; } else if (source < 0x7F00) { bytes[limit++] = (byte) (0x80 | (source >> 8)); bytes[limit++] = (byte) source; } else { bytes[limit++] = (byte) -1; bytes[limit++] = (byte) (source >> 8); bytes[limit++] = (byte) source; } return limit; } /** * Transform the string into a sequence of bytes, appending them after start, and return the new limit. */ public static int getBytes(CharSequence source, byte[] bytes, int limit) { for (int i = 0; i < source.length(); ++i) { limit = getBytes(source.charAt(i), bytes, limit); } return limit; } /** * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking. */ public static String getChars(byte[] bytes, int start, int limit) { StringBuilder buffer = new StringBuilder(); char[] output = new char[1]; for (int i = start; i < limit;) { i = getChar(bytes, i, output); buffer.append(output[0]); } return buffer.toString(); } public static int getChar(byte[] bytes, int start, char[] output) { byte b = bytes[start++]; if (b >= 0) { output[0] = (char) b; } else if (b != (byte) -1) { // 2 bytes int b1 = 0x7F & b; int b2 = 0xFF & bytes[start++]; output[0] = (char) ((b1 << 8) | b2); } else { int b2 = bytes[start++]; int b3 = 0xFF & bytes[start++]; output[0] = (char) ((b2 << 8) | b3); } return start; } private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) { int len = entry.bytesLength(); for (int i = 0; i < len;) { byte b = entry.byteAt(i++); if (b >= 0) { stringBuilder.append((char) b); } else if (b != (byte) -1) { // 2 bytes int b1 = 0x7F & b; int b2 = 0xFF & entry.byteAt(i++); stringBuilder.append((char) ((b1 << 8) | b2)); } else { int b2 = entry.byteAt(i++); int b3 = 0xFF & entry.byteAt(i++); stringBuilder.append((char) ((b2 << 8) | b3)); } } } } public static String toString(BytesTrie bytesTrie2) { return toString(bytesTrie2, " : ", "\n"); } public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { StringBuilder buffer = new StringBuilder(); BytesTrie.Iterator iterator = bytesTrie2.iterator(); while (iterator.hasNext()) { BytesTrie.Entry bytesEntry = iterator.next(); int len = bytesEntry.bytesLength(); byte[] bytes = new byte[len]; bytesEntry.copyBytesTo(bytes, 0); buffer.append(Utility.hex(bytes, 0, len, " ")) .append(keyValueSeparator) .append(bytesEntry.value) .append(itemSeparator); } return buffer.toString(); } public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { StringBuilder buffer = new StringBuilder(); CharsTrie.Iterator iterator = bytesTrie2.iterator(); while (iterator.hasNext()) { CharsTrie.Entry bytesEntry = iterator.next(); buffer.append(Utility.hex(bytesEntry.chars)) .append(keyValueSeparator) .append(bytesEntry.value) .append(itemSeparator); } return buffer.toString(); } }