1/* 2 ****************************************************************************** 3 * Copyright (C) 1996-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ****************************************************************************** 6 */ 7 8package com.ibm.icu.impl; 9 10import java.io.IOException; 11import java.nio.ByteBuffer; 12import java.util.Arrays; 13 14import com.ibm.icu.text.UTF16; 15 16/** 17 * Trie implementation which stores data in int, 32 bits. 18 * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32} 19 * as long as that does not load ICU4C selector data. 20 * 21 * @author synwee 22 * @see com.ibm.icu.impl.Trie 23 * @since release 2.1, Jan 01 2002 24 */ 25public class IntTrie extends Trie 26{ 27 // public constructors --------------------------------------------- 28 29 /** 30 * <p>Creates a new Trie with the settings for the trie data.</p> 31 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 32 * trie.</p> 33 * @param bytes file buffer to a ICU data file, containing the trie 34 * @param dataManipulate object which provides methods to parse the char 35 * data 36 * @throws IOException thrown when data reading fails 37 */ 38 public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate) 39 throws IOException 40 { 41 super(bytes, dataManipulate); 42 if (!isIntTrie()) { 43 throw new IllegalArgumentException( 44 "Data given does not belong to a int trie."); 45 } 46 } 47 48 /** 49 * Make a dummy IntTrie. 50 * A dummy trie is an empty runtime trie, used when a real data trie cannot 51 * be loaded. 52 * 53 * The trie always returns the initialValue, 54 * or the leadUnitValue for lead surrogate code points. 55 * The Latin-1 part is always set up to be linear. 56 * 57 * @param initialValue the initial value that is set for all code points 58 * @param leadUnitValue the value for lead surrogate code _units_ that do not 59 * have associated supplementary data 60 * @param dataManipulate object which provides methods to parse the char data 61 */ 62 @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 63 public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { 64 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); 65 66 int dataLength, latin1Length, i, limit; 67 char block; 68 69 /* calculate the actual size of the dummy trie data */ 70 71 /* max(Latin-1, block 0) */ 72 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; 73 if(leadUnitValue!=initialValue) { 74 dataLength+=DATA_BLOCK_LENGTH; 75 } 76 m_data_=new int[dataLength]; 77 m_dataLength_=dataLength; 78 79 m_initialValue_=initialValue; 80 81 /* fill the index and data arrays */ 82 83 /* indexes are preset to 0 (block 0) */ 84 85 /* Latin-1 data */ 86 for(i=0; i<latin1Length; ++i) { 87 m_data_[i]=initialValue; 88 } 89 90 if(leadUnitValue!=initialValue) { 91 /* indexes for lead surrogate code units to the block after Latin-1 */ 92 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); 93 i=0xd800>>INDEX_STAGE_1_SHIFT_; 94 limit=0xdc00>>INDEX_STAGE_1_SHIFT_; 95 for(; i<limit; ++i) { 96 m_index_[i]=block; 97 } 98 99 /* data for lead surrogate code units */ 100 limit=latin1Length+DATA_BLOCK_LENGTH; 101 for(i=latin1Length; i<limit; ++i) { 102 m_data_[i]=leadUnitValue; 103 } 104 } 105 } 106 107 // public methods -------------------------------------------------- 108 109 /** 110 * Gets the value associated with the codepoint. 111 * If no value is associated with the codepoint, a default value will be 112 * returned. 113 * @param ch codepoint 114 * @return offset to data 115 */ 116 public final int getCodePointValue(int ch) 117 { 118 int offset; 119 120 // fastpath for U+0000..U+D7FF 121 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 122 // copy of getRawOffset() 123 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 124 + (ch & INDEX_STAGE_3_MASK_); 125 return m_data_[offset]; 126 } 127 128 // handle U+D800..U+10FFFF 129 offset = getCodePointOffset(ch); 130 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 131 } 132 133 /** 134 * Gets the value to the data which this lead surrogate character points 135 * to. 136 * Returned data may contain folding offset information for the next 137 * trailing surrogate character. 138 * This method does not guarantee correct results for trail surrogates. 139 * @param ch lead surrogate character 140 * @return data value 141 */ 142 public final int getLeadValue(char ch) 143 { 144 return m_data_[getLeadOffset(ch)]; 145 } 146 147 /** 148 * Get the value associated with the BMP code point. 149 * Lead surrogate code points are treated as normal code points, with 150 * unfolded values that may differ from getLeadValue() results. 151 * @param ch the input BMP code point 152 * @return trie data value associated with the BMP codepoint 153 */ 154 public final int getBMPValue(char ch) 155 { 156 return m_data_[getBMPOffset(ch)]; 157 } 158 159 /** 160 * Get the value associated with a pair of surrogates. 161 * @param lead a lead surrogate 162 * @param trail a trail surrogate 163 */ 164 public final int getSurrogateValue(char lead, char trail) 165 { 166 if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { 167 throw new IllegalArgumentException( 168 "Argument characters do not form a supplementary character"); 169 } 170 // get fold position for the next trail surrogate 171 int offset = getSurrogateOffset(lead, trail); 172 173 // get the real data from the folded lead/trail units 174 if (offset > 0) { 175 return m_data_[offset]; 176 } 177 178 // return m_initialValue_ if there is an error 179 return m_initialValue_; 180 } 181 182 /** 183 * Get a value from a folding offset (from the value of a lead surrogate) 184 * and a trail surrogate. 185 * @param leadvalue the value of a lead surrogate that contains the 186 * folding offset 187 * @param trail surrogate 188 * @return trie data value associated with the trail character 189 */ 190 public final int getTrailValue(int leadvalue, char trail) 191 { 192 if (m_dataManipulate_ == null) { 193 throw new NullPointerException( 194 "The field DataManipulate in this Trie is null"); 195 } 196 int offset = m_dataManipulate_.getFoldingOffset(leadvalue); 197 if (offset > 0) { 198 return m_data_[getRawOffset(offset, 199 (char)(trail & SURROGATE_MASK_))]; 200 } 201 return m_initialValue_; 202 } 203 204 /** 205 * <p>Gets the latin 1 fast path value.</p> 206 * <p>Note this only works if latin 1 characters have their own linear 207 * array.</p> 208 * @param ch latin 1 characters 209 * @return value associated with latin character 210 */ 211 public final int getLatin1LinearValue(char ch) 212 { 213 return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch]; 214 } 215 216 /** 217 * Checks if the argument Trie has the same data as this Trie 218 * @param other Trie to check 219 * @return true if the argument Trie has the same data as this Trie, false 220 * otherwise 221 */ 222 ///CLOVER:OFF 223 public boolean equals(Object other) 224 { 225 boolean result = super.equals(other); 226 if (result && other instanceof IntTrie) { 227 IntTrie othertrie = (IntTrie)other; 228 if (m_initialValue_ != othertrie.m_initialValue_ 229 || !Arrays.equals(m_data_, othertrie.m_data_)) { 230 return false; 231 } 232 return true; 233 } 234 return false; 235 } 236 237 public int hashCode() { 238 assert false : "hashCode not designed"; 239 return 42; 240 } 241 ///CLOVER:ON 242 243 // protected methods ----------------------------------------------- 244 245 /** 246 * <p>Parses the input stream and stores its trie content into a index and 247 * data array</p> 248 * @param bytes data buffer containing trie data 249 */ 250 protected final void unserialize(ByteBuffer bytes) 251 { 252 super.unserialize(bytes); 253 // one used for initial value 254 m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0); 255 m_initialValue_ = m_data_[0]; 256 } 257 258 /** 259 * Gets the offset to the data which the surrogate pair points to. 260 * @param lead lead surrogate 261 * @param trail trailing surrogate 262 * @return offset to data 263 */ 264 protected final int getSurrogateOffset(char lead, char trail) 265 { 266 if (m_dataManipulate_ == null) { 267 throw new NullPointerException( 268 "The field DataManipulate in this Trie is null"); 269 } 270 // get fold position for the next trail surrogate 271 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 272 273 // get the real data from the folded lead/trail units 274 if (offset > 0) { 275 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 276 } 277 278 // return -1 if there is an error, in this case we return the default 279 // value: m_initialValue_ 280 return -1; 281 } 282 283 /** 284 * Gets the value at the argument index. 285 * For use internally in TrieIterator 286 * @param index value at index will be retrieved 287 * @return 32 bit value 288 * @see com.ibm.icu.impl.TrieIterator 289 */ 290 protected final int getValue(int index) 291 { 292 return m_data_[index]; 293 } 294 295 /** 296 * Gets the default initial value 297 * @return 32 bit value 298 */ 299 protected final int getInitialValue() 300 { 301 return m_initialValue_; 302 } 303 304 // package private methods ----------------------------------------- 305 306 /** 307 * Internal constructor for builder use 308 * @param index the index array to be slotted into this trie 309 * @param data the data array to be slotted into this trie 310 * @param initialvalue the initial value for this trie 311 * @param options trie options to use 312 * @param datamanipulate folding implementation 313 */ 314 IntTrie(char index[], int data[], int initialvalue, int options, 315 DataManipulate datamanipulate) 316 { 317 super(index, options, datamanipulate); 318 m_data_ = data; 319 m_dataLength_ = m_data_.length; 320 m_initialValue_ = initialvalue; 321 } 322 323 // private data members -------------------------------------------- 324 325 /** 326 * Default value 327 */ 328 private int m_initialValue_; 329 /** 330 * Array of char data 331 */ 332 private int m_data_[]; 333} 334