1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/**
4*******************************************************************************
5* Copyright (C) 1996-2010, International Business Machines Corporation and    *
6* others. All Rights Reserved.                                                *
7*******************************************************************************
8*/
9
10package com.ibm.icu.dev.test.util;
11
12import org.junit.Test;
13
14import com.ibm.icu.dev.test.TestFmwk;
15import com.ibm.icu.impl.CharTrie;
16import com.ibm.icu.impl.IntTrie;
17import com.ibm.icu.impl.IntTrieBuilder;
18import com.ibm.icu.impl.Trie;
19import com.ibm.icu.impl.TrieBuilder;
20import com.ibm.icu.impl.TrieIterator;
21import com.ibm.icu.text.UTF16;
22import com.ibm.icu.util.RangeValueIterator;
23
24/**
25* Testing class for Trie. Tests here will be simple, since both CharTrie and
26* IntTrie are very similar and are heavily used in other parts of ICU4J.
27* Codes using Tries are expected to have detailed tests.
28* @author Syn Wee Quek
29* @since release 2.1 Jan 01 2002
30*/
31public final class TrieTest extends TestFmwk
32{
33    // constructor ---------------------------------------------------
34
35    /**
36    * Constructor
37    */
38    public TrieTest()
39    {
40    }
41
42    // public methods -----------------------------------------------
43
44    /**
45     * Values for setting possibly overlapping, out-of-order ranges of values
46     */
47    private static final class SetRange
48    {
49        SetRange(int start, int limit, int value, boolean overwrite)
50        {
51            this.start = start;
52            this.limit = limit;
53            this.value = value;
54            this.overwrite = overwrite;
55        }
56
57        int start, limit;
58        int value;
59        boolean overwrite;
60    }
61
62    /**
63     * Values for testing:
64     * value is set from the previous boundary's limit to before
65     * this boundary's limit
66     */
67    private static final class CheckRange
68    {
69        CheckRange(int limit, int value)
70        {
71            this.limit = limit;
72            this.value = value;
73        }
74
75        int limit;
76        int value;
77    }
78
79    private static final class _testFoldedValue
80                                        implements TrieBuilder.DataManipulate
81    {
82        public _testFoldedValue(IntTrieBuilder builder)
83        {
84            m_builder_ = builder;
85        }
86
87        public int getFoldedValue(int start, int offset)
88        {
89            int foldedValue = 0;
90            int limit = start + 0x400;
91            while (start < limit) {
92                int value = m_builder_.getValue(start);
93                if (m_builder_.isInZeroBlock(start)) {
94                    start += TrieBuilder.DATA_BLOCK_LENGTH;
95                }
96                else {
97                    foldedValue |= value;
98                    ++ start;
99                }
100            }
101
102            if (foldedValue != 0) {
103                return (offset << 16) | foldedValue;
104            }
105            return 0;
106        }
107
108        private IntTrieBuilder m_builder_;
109    }
110
111    private static final class _testFoldingOffset
112                                                implements Trie.DataManipulate
113    {
114        public int getFoldingOffset(int value)
115        {
116            return value >>> 16;
117        }
118    }
119
120    private static final class _testEnumValue extends TrieIterator
121    {
122        public _testEnumValue(Trie data)
123        {
124            super(data);
125        }
126
127        protected int extract(int value)
128        {
129            return value ^ 0x5555;
130        }
131    }
132
133    private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[],
134                                    int countCheckRanges)
135    {
136        // write a string
137        int countValues = 0;
138        StringBuffer s = new StringBuffer();
139        int values[] = new int[30];
140        for (int i = 0; i < countCheckRanges; ++ i) {
141            int c = checkRanges[i].limit;
142            if (c != 0) {
143                -- c;
144                UTF16.append(s, c);
145                values[countValues ++] = checkRanges[i].value;
146            }
147        }
148        int limit = s.length();
149        // try forward
150        int p = 0;
151        int i = 0;
152        while(p < limit) {
153            int c = UTF16.charAt(s, p);
154            p += UTF16.getCharCount(c);
155            int value = trie.getCodePointValue(c);
156            if (value != values[i]) {
157                errln("wrong value from UTRIE_NEXT(U+"
158                      + Integer.toHexString(c) + "): 0x"
159                      + Integer.toHexString(value) + " instead of 0x"
160                      + Integer.toHexString(values[i]));
161            }
162            // unlike the c version lead is 0 if c is non-supplementary
163            char lead = UTF16.getLeadSurrogate(c);
164            char trail = UTF16.getTrailSurrogate(c);
165            if (lead == 0
166                ? trail != s.charAt(p - 1)
167                : !UTF16.isLeadSurrogate(lead)
168                  || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2)
169                  || trail != s.charAt(p - 1)) {
170                errln("wrong (lead, trail) from UTRIE_NEXT(U+"
171                      + Integer.toHexString(c));
172                continue;
173            }
174            if (lead != 0) {
175                value = trie.getLeadValue(lead);
176                value = trie.getTrailValue(value, trail);
177                if (value != trie.getSurrogateValue(lead, trail)
178                    && value != values[i]) {
179                    errln("wrong value from getting supplementary "
180                          + "values (U+"
181                          + Integer.toHexString(c) + "): 0x"
182                          + Integer.toHexString(value) + " instead of 0x"
183                          + Integer.toHexString(values[i]));
184                }
185            }
186            ++ i;
187        }
188    }
189
190    private void _testTrieRanges(SetRange setRanges[], int countSetRanges,
191                                 CheckRange checkRanges[], int countCheckRanges,
192                                 boolean latin1Linear)
193    {
194        IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
195                                                    checkRanges[0].value,
196                                                    checkRanges[0].value,
197                                                    latin1Linear);
198
199        // set values from setRanges[]
200        boolean ok = true;
201        for (int i = 0; i < countSetRanges; ++ i) {
202            int start = setRanges[i].start;
203            int limit = setRanges[i].limit;
204            int value = setRanges[i].value;
205            boolean overwrite = setRanges[i].overwrite;
206            if ((limit - start) == 1 && overwrite) {
207                ok &= newTrie.setValue(start, value);
208            }
209            else {
210                ok &= newTrie.setRange(start, limit, value, overwrite);
211            }
212        }
213        if (!ok) {
214            errln("setting values into a trie failed");
215            return;
216        }
217
218        // verify that all these values are in the new Trie
219        int start = 0;
220        for (int i = 0; i < countCheckRanges; ++ i) {
221            int limit = checkRanges[i].limit;
222            int value = checkRanges[i].value;
223
224            while (start < limit) {
225                if (value != newTrie.getValue(start)) {
226                    errln("newTrie [U+"
227                          + Integer.toHexString(start) + "]==0x"
228                          + Integer.toHexString(newTrie.getValue(start))
229                          + " instead of 0x" + Integer.toHexString(value));
230                }
231                ++ start;
232            }
233        }
234
235        IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie),
236                                         new _testFoldingOffset());
237
238        // test linear Latin-1 range from utrie_getData()
239        if (latin1Linear) {
240            start = 0;
241            for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) {
242                int limit = checkRanges[i].limit;
243                int value = checkRanges[i].value;
244
245                while (start < limit && start <= 0xff) {
246                    if (value != trie.getLatin1LinearValue((char)start)) {
247                        errln("IntTrie.getLatin1LinearValue[U+"
248                              + Integer.toHexString(start) + "]==0x"
249                              + Integer.toHexString(
250                                        trie.getLatin1LinearValue((char) start))
251                              + " instead of 0x" + Integer.toHexString(value));
252                    }
253                    ++ start;
254                }
255            }
256        }
257
258        if (latin1Linear != trie.isLatin1Linear()) {
259            errln("trie serialization did not preserve "
260                  + "Latin-1-linearity");
261        }
262
263        // verify that all these values are in the serialized Trie
264        start = 0;
265        for (int i = 0; i < countCheckRanges; ++ i) {
266            int limit = checkRanges[i].limit;
267            int value = checkRanges[i].value;
268
269            if (start == 0xd800) {
270                // skip surrogates
271                start = limit;
272                continue;
273            }
274
275            while (start < limit) {
276                if (start <= 0xffff) {
277                    int value2 = trie.getBMPValue((char)start);
278                    if (value != value2) {
279                        errln("serialized trie.getBMPValue(U+"
280                              + Integer.toHexString(start) + " == 0x"
281                              + Integer.toHexString(value2) + " instead of 0x"
282                              + Integer.toHexString(value));
283                    }
284                    if (!UTF16.isLeadSurrogate((char)start)) {
285                        value2 = trie.getLeadValue((char)start);
286                        if (value != value2) {
287                            errln("serialized trie.getLeadValue(U+"
288                              + Integer.toHexString(start) + " == 0x"
289                              + Integer.toHexString(value2) + " instead of 0x"
290                              + Integer.toHexString(value));
291                        }
292                    }
293                }
294                int value2 = trie.getCodePointValue(start);
295                if (value != value2) {
296                    errln("serialized trie.getCodePointValue(U+"
297                          + Integer.toHexString(start) + ")==0x"
298                          + Integer.toHexString(value2) + " instead of 0x"
299                          + Integer.toHexString(value));
300                }
301                ++ start;
302            }
303        }
304
305        // enumerate and verify all ranges
306
307        int enumRanges = 1;
308        TrieIterator iter  = new _testEnumValue(trie);
309        RangeValueIterator.Element result = new RangeValueIterator.Element();
310        while (iter.next(result)) {
311            if (result.start != checkRanges[enumRanges -1].limit
312                || result.limit != checkRanges[enumRanges].limit
313                || (result.value ^ 0x5555) != checkRanges[enumRanges].value) {
314                errln("utrie_enum() delivers wrong range [U+"
315                      + Integer.toHexString(result.start) + "..U+"
316                      + Integer.toHexString(result.limit) + "].0x"
317                      + Integer.toHexString(result.value ^ 0x5555)
318                      + " instead of [U+"
319                      + Integer.toHexString(checkRanges[enumRanges -1].limit)
320                      + "..U+"
321                      + Integer.toHexString(checkRanges[enumRanges].limit)
322                      + "].0x"
323                      + Integer.toHexString(checkRanges[enumRanges].value));
324            }
325            enumRanges ++;
326        }
327
328        // test linear Latin-1 range
329        if (trie.isLatin1Linear()) {
330            for (start = 0; start < 0x100; ++ start) {
331                if (trie.getLatin1LinearValue((char)start)
332                    != trie.getLeadValue((char)start)) {
333                    errln("trie.getLatin1LinearValue[U+"
334                          + Integer.toHexString(start) + "]=0x"
335                          + Integer.toHexString(
336                                        trie.getLatin1LinearValue((char)start))
337                          + " instead of 0x"
338                          + Integer.toHexString(
339                                        trie.getLeadValue((char)start)));
340                }
341            }
342        }
343
344        _testTrieIteration(trie, checkRanges, countCheckRanges);
345    }
346
347    private void _testTrieRanges2(SetRange setRanges[],
348                                  int countSetRanges,
349                                  CheckRange checkRanges[],
350                                  int countCheckRanges)
351    {
352        _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
353                        false);
354
355        _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
356                        true);
357    }
358
359    private void _testTrieRanges4(SetRange setRanges[], int countSetRanges,
360                                  CheckRange checkRanges[],
361                                  int countCheckRanges)
362    {
363        _testTrieRanges2(setRanges, countSetRanges, checkRanges,
364                         countCheckRanges);
365    }
366
367    // test data ------------------------------------------------------------
368
369    /**
370     * set consecutive ranges, even with value 0
371     */
372    private static SetRange setRanges1[]={
373        new SetRange(0,      0x20,       0,      false),
374        new SetRange(0x20,   0xa7,       0x1234, false),
375        new SetRange(0xa7,   0x3400,     0,      false),
376        new SetRange(0x3400, 0x9fa6,     0x6162, false),
377        new SetRange(0x9fa6, 0xda9e,     0x3132, false),
378        // try to disrupt _testFoldingOffset16()
379        new SetRange(0xdada, 0xeeee,     0x87ff, false),
380        new SetRange(0xeeee, 0x11111,    1,      false),
381        new SetRange(0x11111, 0x44444,   0x6162, false),
382        new SetRange(0x44444, 0x60003,   0,      false),
383        new SetRange(0xf0003, 0xf0004,   0xf,    false),
384        new SetRange(0xf0004, 0xf0006,   0x10,   false),
385        new SetRange(0xf0006, 0xf0007,   0x11,   false),
386        new SetRange(0xf0007, 0xf0020,   0x12,   false),
387        new SetRange(0xf0020, 0x110000,  0,      false)
388    };
389
390    private static CheckRange checkRanges1[]={
391        new CheckRange(0,      0), // dummy start range to make _testEnumRange() simpler
392        new CheckRange(0x20,   0),
393        new CheckRange(0xa7,   0x1234),
394        new CheckRange(0x3400, 0),
395        new CheckRange(0x9fa6, 0x6162),
396        new CheckRange(0xda9e, 0x3132),
397        new CheckRange(0xdada, 0),
398        new CheckRange(0xeeee, 0x87ff),
399        new CheckRange(0x11111,1),
400        new CheckRange(0x44444,0x6162),
401        new CheckRange(0xf0003,0),
402        new CheckRange(0xf0004,0xf),
403        new CheckRange(0xf0006,0x10),
404        new CheckRange(0xf0007,0x11),
405        new CheckRange(0xf0020,0x12),
406        new CheckRange(0x110000, 0)
407    };
408
409    /**
410     * set some interesting overlapping ranges
411     */
412    private static SetRange setRanges2[]={
413        new SetRange(0x21,   0x7f,       0x5555, true),
414        new SetRange(0x2f800,0x2fedc,    0x7a,   true),
415        new SetRange(0x72,   0xdd,       3,      true),
416        new SetRange(0xdd,   0xde,       4,      false),
417        new SetRange(0x2f987,0x2fa98,    5,      true),
418        new SetRange(0x2f777,0x2f833,    0,      true),
419        new SetRange(0x2f900,0x2ffee,    1,      false),
420        new SetRange(0x2ffee,0x2ffef,    2,      true)
421    };
422
423    private static CheckRange checkRanges2[]={
424        // dummy start range to make _testEnumRange() simpler
425        new CheckRange(0,      0),
426        new CheckRange(0x21,   0),
427        new CheckRange(0x72,   0x5555),
428        new CheckRange(0xdd,   3),
429        new CheckRange(0xde,   4),
430        new CheckRange(0x2f833,0),
431        new CheckRange(0x2f987,0x7a),
432        new CheckRange(0x2fa98,5),
433        new CheckRange(0x2fedc,0x7a),
434        new CheckRange(0x2ffee,1),
435        new CheckRange(0x2ffef,2),
436        new CheckRange(0x110000, 0)
437    };
438
439    /**
440     * use a non-zero initial value
441     */
442    private static SetRange setRanges3[]={
443        new SetRange(0x31,   0xa4,   1,  false),
444        new SetRange(0x3400, 0x6789, 2,  false),
445        new SetRange(0x30000,0x34567,9,  true),
446        new SetRange(0x45678,0x56789,3,  true)
447    };
448
449    private static CheckRange checkRanges3[]={
450        // dummy start range, also carries the initial value
451        new CheckRange(0,      9),
452        new CheckRange(0x31,   9),
453        new CheckRange(0xa4,   1),
454        new CheckRange(0x3400, 9),
455        new CheckRange(0x6789, 2),
456        new CheckRange(0x45678,9),
457        new CheckRange(0x56789,3),
458        new CheckRange(0x110000,9)
459    };
460
461    @Test
462    public void TestIntTrie()
463    {
464        _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1,
465                         checkRanges1.length);
466        _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2,
467                         checkRanges2.length);
468        _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3,
469                         checkRanges3.length);
470    }
471
472    private static class DummyGetFoldingOffset implements Trie.DataManipulate {
473        public int getFoldingOffset(int value) {
474            return -1; /* never get non-initialValue data for supplementary code points */
475        }
476    }
477
478    @Test
479    public void TestDummyCharTrie() {
480        CharTrie trie;
481        final int initialValue=0x313, leadUnitValue=0xaffe;
482        int value;
483        int c;
484        trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
485
486        /* test that all code points have initialValue */
487        for(c=0; c<=0x10ffff; ++c) {
488            value=trie.getCodePointValue(c);
489            if(value!=initialValue) {
490                errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
491            }
492        }
493
494        /* test that the lead surrogate code units have leadUnitValue */
495        for(c=0xd800; c<=0xdbff; ++c) {
496            value=trie.getLeadValue((char)c);
497            if(value!=leadUnitValue) {
498                errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
499            }
500        }
501    }
502
503    @Test
504    public void TestDummyIntTrie() {
505        IntTrie trie;
506        final int initialValue=0x01234567, leadUnitValue=0x89abcdef;
507        int value;
508        int c;
509        trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
510
511        /* test that all code points have initialValue */
512        for(c=0; c<=0x10ffff; ++c) {
513            value=trie.getCodePointValue(c);
514            if(value!=initialValue) {
515                errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
516            }
517        }
518
519        /* test that the lead surrogate code units have leadUnitValue */
520        for(c=0xd800; c<=0xdbff; ++c) {
521            value=trie.getLeadValue((char)c);
522            if(value!=leadUnitValue) {
523                errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
524            }
525        }
526    }
527}
528