1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/** 4******************************************************************************* 5* Copyright (C) 1996-2010, International Business Machines Corporation and * 6* others. All Rights Reserved. * 7******************************************************************************* 8*/ 9 10package com.ibm.icu.dev.test.util; 11 12import org.junit.Test; 13 14import com.ibm.icu.dev.test.TestFmwk; 15import com.ibm.icu.impl.CharTrie; 16import com.ibm.icu.impl.IntTrie; 17import com.ibm.icu.impl.IntTrieBuilder; 18import com.ibm.icu.impl.Trie; 19import com.ibm.icu.impl.TrieBuilder; 20import com.ibm.icu.impl.TrieIterator; 21import com.ibm.icu.text.UTF16; 22import com.ibm.icu.util.RangeValueIterator; 23 24/** 25* Testing class for Trie. Tests here will be simple, since both CharTrie and 26* IntTrie are very similar and are heavily used in other parts of ICU4J. 27* Codes using Tries are expected to have detailed tests. 28* @author Syn Wee Quek 29* @since release 2.1 Jan 01 2002 30*/ 31public final class TrieTest extends TestFmwk 32{ 33 // constructor --------------------------------------------------- 34 35 /** 36 * Constructor 37 */ 38 public TrieTest() 39 { 40 } 41 42 // public methods ----------------------------------------------- 43 44 /** 45 * Values for setting possibly overlapping, out-of-order ranges of values 46 */ 47 private static final class SetRange 48 { 49 SetRange(int start, int limit, int value, boolean overwrite) 50 { 51 this.start = start; 52 this.limit = limit; 53 this.value = value; 54 this.overwrite = overwrite; 55 } 56 57 int start, limit; 58 int value; 59 boolean overwrite; 60 } 61 62 /** 63 * Values for testing: 64 * value is set from the previous boundary's limit to before 65 * this boundary's limit 66 */ 67 private static final class CheckRange 68 { 69 CheckRange(int limit, int value) 70 { 71 this.limit = limit; 72 this.value = value; 73 } 74 75 int limit; 76 int value; 77 } 78 79 private static final class _testFoldedValue 80 implements TrieBuilder.DataManipulate 81 { 82 public _testFoldedValue(IntTrieBuilder builder) 83 { 84 m_builder_ = builder; 85 } 86 87 public int getFoldedValue(int start, int offset) 88 { 89 int foldedValue = 0; 90 int limit = start + 0x400; 91 while (start < limit) { 92 int value = m_builder_.getValue(start); 93 if (m_builder_.isInZeroBlock(start)) { 94 start += TrieBuilder.DATA_BLOCK_LENGTH; 95 } 96 else { 97 foldedValue |= value; 98 ++ start; 99 } 100 } 101 102 if (foldedValue != 0) { 103 return (offset << 16) | foldedValue; 104 } 105 return 0; 106 } 107 108 private IntTrieBuilder m_builder_; 109 } 110 111 private static final class _testFoldingOffset 112 implements Trie.DataManipulate 113 { 114 public int getFoldingOffset(int value) 115 { 116 return value >>> 16; 117 } 118 } 119 120 private static final class _testEnumValue extends TrieIterator 121 { 122 public _testEnumValue(Trie data) 123 { 124 super(data); 125 } 126 127 protected int extract(int value) 128 { 129 return value ^ 0x5555; 130 } 131 } 132 133 private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[], 134 int countCheckRanges) 135 { 136 // write a string 137 int countValues = 0; 138 StringBuffer s = new StringBuffer(); 139 int values[] = new int[30]; 140 for (int i = 0; i < countCheckRanges; ++ i) { 141 int c = checkRanges[i].limit; 142 if (c != 0) { 143 -- c; 144 UTF16.append(s, c); 145 values[countValues ++] = checkRanges[i].value; 146 } 147 } 148 int limit = s.length(); 149 // try forward 150 int p = 0; 151 int i = 0; 152 while(p < limit) { 153 int c = UTF16.charAt(s, p); 154 p += UTF16.getCharCount(c); 155 int value = trie.getCodePointValue(c); 156 if (value != values[i]) { 157 errln("wrong value from UTRIE_NEXT(U+" 158 + Integer.toHexString(c) + "): 0x" 159 + Integer.toHexString(value) + " instead of 0x" 160 + Integer.toHexString(values[i])); 161 } 162 // unlike the c version lead is 0 if c is non-supplementary 163 char lead = UTF16.getLeadSurrogate(c); 164 char trail = UTF16.getTrailSurrogate(c); 165 if (lead == 0 166 ? trail != s.charAt(p - 1) 167 : !UTF16.isLeadSurrogate(lead) 168 || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2) 169 || trail != s.charAt(p - 1)) { 170 errln("wrong (lead, trail) from UTRIE_NEXT(U+" 171 + Integer.toHexString(c)); 172 continue; 173 } 174 if (lead != 0) { 175 value = trie.getLeadValue(lead); 176 value = trie.getTrailValue(value, trail); 177 if (value != trie.getSurrogateValue(lead, trail) 178 && value != values[i]) { 179 errln("wrong value from getting supplementary " 180 + "values (U+" 181 + Integer.toHexString(c) + "): 0x" 182 + Integer.toHexString(value) + " instead of 0x" 183 + Integer.toHexString(values[i])); 184 } 185 } 186 ++ i; 187 } 188 } 189 190 private void _testTrieRanges(SetRange setRanges[], int countSetRanges, 191 CheckRange checkRanges[], int countCheckRanges, 192 boolean latin1Linear) 193 { 194 IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000, 195 checkRanges[0].value, 196 checkRanges[0].value, 197 latin1Linear); 198 199 // set values from setRanges[] 200 boolean ok = true; 201 for (int i = 0; i < countSetRanges; ++ i) { 202 int start = setRanges[i].start; 203 int limit = setRanges[i].limit; 204 int value = setRanges[i].value; 205 boolean overwrite = setRanges[i].overwrite; 206 if ((limit - start) == 1 && overwrite) { 207 ok &= newTrie.setValue(start, value); 208 } 209 else { 210 ok &= newTrie.setRange(start, limit, value, overwrite); 211 } 212 } 213 if (!ok) { 214 errln("setting values into a trie failed"); 215 return; 216 } 217 218 // verify that all these values are in the new Trie 219 int start = 0; 220 for (int i = 0; i < countCheckRanges; ++ i) { 221 int limit = checkRanges[i].limit; 222 int value = checkRanges[i].value; 223 224 while (start < limit) { 225 if (value != newTrie.getValue(start)) { 226 errln("newTrie [U+" 227 + Integer.toHexString(start) + "]==0x" 228 + Integer.toHexString(newTrie.getValue(start)) 229 + " instead of 0x" + Integer.toHexString(value)); 230 } 231 ++ start; 232 } 233 } 234 235 IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie), 236 new _testFoldingOffset()); 237 238 // test linear Latin-1 range from utrie_getData() 239 if (latin1Linear) { 240 start = 0; 241 for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) { 242 int limit = checkRanges[i].limit; 243 int value = checkRanges[i].value; 244 245 while (start < limit && start <= 0xff) { 246 if (value != trie.getLatin1LinearValue((char)start)) { 247 errln("IntTrie.getLatin1LinearValue[U+" 248 + Integer.toHexString(start) + "]==0x" 249 + Integer.toHexString( 250 trie.getLatin1LinearValue((char) start)) 251 + " instead of 0x" + Integer.toHexString(value)); 252 } 253 ++ start; 254 } 255 } 256 } 257 258 if (latin1Linear != trie.isLatin1Linear()) { 259 errln("trie serialization did not preserve " 260 + "Latin-1-linearity"); 261 } 262 263 // verify that all these values are in the serialized Trie 264 start = 0; 265 for (int i = 0; i < countCheckRanges; ++ i) { 266 int limit = checkRanges[i].limit; 267 int value = checkRanges[i].value; 268 269 if (start == 0xd800) { 270 // skip surrogates 271 start = limit; 272 continue; 273 } 274 275 while (start < limit) { 276 if (start <= 0xffff) { 277 int value2 = trie.getBMPValue((char)start); 278 if (value != value2) { 279 errln("serialized trie.getBMPValue(U+" 280 + Integer.toHexString(start) + " == 0x" 281 + Integer.toHexString(value2) + " instead of 0x" 282 + Integer.toHexString(value)); 283 } 284 if (!UTF16.isLeadSurrogate((char)start)) { 285 value2 = trie.getLeadValue((char)start); 286 if (value != value2) { 287 errln("serialized trie.getLeadValue(U+" 288 + Integer.toHexString(start) + " == 0x" 289 + Integer.toHexString(value2) + " instead of 0x" 290 + Integer.toHexString(value)); 291 } 292 } 293 } 294 int value2 = trie.getCodePointValue(start); 295 if (value != value2) { 296 errln("serialized trie.getCodePointValue(U+" 297 + Integer.toHexString(start) + ")==0x" 298 + Integer.toHexString(value2) + " instead of 0x" 299 + Integer.toHexString(value)); 300 } 301 ++ start; 302 } 303 } 304 305 // enumerate and verify all ranges 306 307 int enumRanges = 1; 308 TrieIterator iter = new _testEnumValue(trie); 309 RangeValueIterator.Element result = new RangeValueIterator.Element(); 310 while (iter.next(result)) { 311 if (result.start != checkRanges[enumRanges -1].limit 312 || result.limit != checkRanges[enumRanges].limit 313 || (result.value ^ 0x5555) != checkRanges[enumRanges].value) { 314 errln("utrie_enum() delivers wrong range [U+" 315 + Integer.toHexString(result.start) + "..U+" 316 + Integer.toHexString(result.limit) + "].0x" 317 + Integer.toHexString(result.value ^ 0x5555) 318 + " instead of [U+" 319 + Integer.toHexString(checkRanges[enumRanges -1].limit) 320 + "..U+" 321 + Integer.toHexString(checkRanges[enumRanges].limit) 322 + "].0x" 323 + Integer.toHexString(checkRanges[enumRanges].value)); 324 } 325 enumRanges ++; 326 } 327 328 // test linear Latin-1 range 329 if (trie.isLatin1Linear()) { 330 for (start = 0; start < 0x100; ++ start) { 331 if (trie.getLatin1LinearValue((char)start) 332 != trie.getLeadValue((char)start)) { 333 errln("trie.getLatin1LinearValue[U+" 334 + Integer.toHexString(start) + "]=0x" 335 + Integer.toHexString( 336 trie.getLatin1LinearValue((char)start)) 337 + " instead of 0x" 338 + Integer.toHexString( 339 trie.getLeadValue((char)start))); 340 } 341 } 342 } 343 344 _testTrieIteration(trie, checkRanges, countCheckRanges); 345 } 346 347 private void _testTrieRanges2(SetRange setRanges[], 348 int countSetRanges, 349 CheckRange checkRanges[], 350 int countCheckRanges) 351 { 352 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges, 353 false); 354 355 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges, 356 true); 357 } 358 359 private void _testTrieRanges4(SetRange setRanges[], int countSetRanges, 360 CheckRange checkRanges[], 361 int countCheckRanges) 362 { 363 _testTrieRanges2(setRanges, countSetRanges, checkRanges, 364 countCheckRanges); 365 } 366 367 // test data ------------------------------------------------------------ 368 369 /** 370 * set consecutive ranges, even with value 0 371 */ 372 private static SetRange setRanges1[]={ 373 new SetRange(0, 0x20, 0, false), 374 new SetRange(0x20, 0xa7, 0x1234, false), 375 new SetRange(0xa7, 0x3400, 0, false), 376 new SetRange(0x3400, 0x9fa6, 0x6162, false), 377 new SetRange(0x9fa6, 0xda9e, 0x3132, false), 378 // try to disrupt _testFoldingOffset16() 379 new SetRange(0xdada, 0xeeee, 0x87ff, false), 380 new SetRange(0xeeee, 0x11111, 1, false), 381 new SetRange(0x11111, 0x44444, 0x6162, false), 382 new SetRange(0x44444, 0x60003, 0, false), 383 new SetRange(0xf0003, 0xf0004, 0xf, false), 384 new SetRange(0xf0004, 0xf0006, 0x10, false), 385 new SetRange(0xf0006, 0xf0007, 0x11, false), 386 new SetRange(0xf0007, 0xf0020, 0x12, false), 387 new SetRange(0xf0020, 0x110000, 0, false) 388 }; 389 390 private static CheckRange checkRanges1[]={ 391 new CheckRange(0, 0), // dummy start range to make _testEnumRange() simpler 392 new CheckRange(0x20, 0), 393 new CheckRange(0xa7, 0x1234), 394 new CheckRange(0x3400, 0), 395 new CheckRange(0x9fa6, 0x6162), 396 new CheckRange(0xda9e, 0x3132), 397 new CheckRange(0xdada, 0), 398 new CheckRange(0xeeee, 0x87ff), 399 new CheckRange(0x11111,1), 400 new CheckRange(0x44444,0x6162), 401 new CheckRange(0xf0003,0), 402 new CheckRange(0xf0004,0xf), 403 new CheckRange(0xf0006,0x10), 404 new CheckRange(0xf0007,0x11), 405 new CheckRange(0xf0020,0x12), 406 new CheckRange(0x110000, 0) 407 }; 408 409 /** 410 * set some interesting overlapping ranges 411 */ 412 private static SetRange setRanges2[]={ 413 new SetRange(0x21, 0x7f, 0x5555, true), 414 new SetRange(0x2f800,0x2fedc, 0x7a, true), 415 new SetRange(0x72, 0xdd, 3, true), 416 new SetRange(0xdd, 0xde, 4, false), 417 new SetRange(0x2f987,0x2fa98, 5, true), 418 new SetRange(0x2f777,0x2f833, 0, true), 419 new SetRange(0x2f900,0x2ffee, 1, false), 420 new SetRange(0x2ffee,0x2ffef, 2, true) 421 }; 422 423 private static CheckRange checkRanges2[]={ 424 // dummy start range to make _testEnumRange() simpler 425 new CheckRange(0, 0), 426 new CheckRange(0x21, 0), 427 new CheckRange(0x72, 0x5555), 428 new CheckRange(0xdd, 3), 429 new CheckRange(0xde, 4), 430 new CheckRange(0x2f833,0), 431 new CheckRange(0x2f987,0x7a), 432 new CheckRange(0x2fa98,5), 433 new CheckRange(0x2fedc,0x7a), 434 new CheckRange(0x2ffee,1), 435 new CheckRange(0x2ffef,2), 436 new CheckRange(0x110000, 0) 437 }; 438 439 /** 440 * use a non-zero initial value 441 */ 442 private static SetRange setRanges3[]={ 443 new SetRange(0x31, 0xa4, 1, false), 444 new SetRange(0x3400, 0x6789, 2, false), 445 new SetRange(0x30000,0x34567,9, true), 446 new SetRange(0x45678,0x56789,3, true) 447 }; 448 449 private static CheckRange checkRanges3[]={ 450 // dummy start range, also carries the initial value 451 new CheckRange(0, 9), 452 new CheckRange(0x31, 9), 453 new CheckRange(0xa4, 1), 454 new CheckRange(0x3400, 9), 455 new CheckRange(0x6789, 2), 456 new CheckRange(0x45678,9), 457 new CheckRange(0x56789,3), 458 new CheckRange(0x110000,9) 459 }; 460 461 @Test 462 public void TestIntTrie() 463 { 464 _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1, 465 checkRanges1.length); 466 _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2, 467 checkRanges2.length); 468 _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3, 469 checkRanges3.length); 470 } 471 472 private static class DummyGetFoldingOffset implements Trie.DataManipulate { 473 public int getFoldingOffset(int value) { 474 return -1; /* never get non-initialValue data for supplementary code points */ 475 } 476 } 477 478 @Test 479 public void TestDummyCharTrie() { 480 CharTrie trie; 481 final int initialValue=0x313, leadUnitValue=0xaffe; 482 int value; 483 int c; 484 trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset()); 485 486 /* test that all code points have initialValue */ 487 for(c=0; c<=0x10ffff; ++c) { 488 value=trie.getCodePointValue(c); 489 if(value!=initialValue) { 490 errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue)); 491 } 492 } 493 494 /* test that the lead surrogate code units have leadUnitValue */ 495 for(c=0xd800; c<=0xdbff; ++c) { 496 value=trie.getLeadValue((char)c); 497 if(value!=leadUnitValue) { 498 errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue)); 499 } 500 } 501 } 502 503 @Test 504 public void TestDummyIntTrie() { 505 IntTrie trie; 506 final int initialValue=0x01234567, leadUnitValue=0x89abcdef; 507 int value; 508 int c; 509 trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset()); 510 511 /* test that all code points have initialValue */ 512 for(c=0; c<=0x10ffff; ++c) { 513 value=trie.getCodePointValue(c); 514 if(value!=initialValue) { 515 errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue)); 516 } 517 } 518 519 /* test that the lead surrogate code units have leadUnitValue */ 520 for(c=0xd800; c<=0xdbff; ++c) { 521 value=trie.getLeadValue((char)c); 522 if(value!=leadUnitValue) { 523 errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue)); 524 } 525 } 526 } 527} 528