15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)******************************************************************************
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   Copyright (C) 2001-2011, International Business Machines
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   Corporation and others.  All Rights Reserved.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)******************************************************************************
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   file name:  trietest.c
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   encoding:   US-ASCII
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   tab size:   8 (not used)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   indentation:4
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   created on: 2001nov20
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*   created by: Markus W. Scherer
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h>
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "unicode/utypes.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "unicode/utf16.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "utrie.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "cstring.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "cmemory.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if 1
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "cintltst.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* definitions from standalone utrie development */
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define log_err printf
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define log_verbose printf
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef u_errorName
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define u_errorName(errorCode) "some error code"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Values for setting possibly overlapping, out-of-order ranges of values */
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct SetRange {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 start, limit;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t value;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UBool overwrite;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} SetRange;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Values for testing:
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * value is set from the previous boundary's limit to before
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this boundary's limit
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct CheckRange {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 limit;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t value;
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} CheckRange;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint32_t U_CALLCONV
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_testFoldedValue32(UNewTrie *trie, UChar32 start, int32_t offset) {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t foldedValue, value;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 limit;
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UBool inBlockZero;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    foldedValue=0;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    limit=start+0x400;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while(start<limit) {
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        value=utrie_get32(trie, start, &inBlockZero);
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if(inBlockZero) {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            start+=UTRIE_DATA_BLOCK_LENGTH;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            foldedValue|=value;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ++start;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(foldedValue!=0) {
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return ((uint32_t)offset<<16)|foldedValue;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return 0;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int32_t U_CALLCONV
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_testFoldingOffset32(uint32_t data) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (int32_t)(data>>16);
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint32_t U_CALLCONV
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_testFoldedValue16(UNewTrie *trie, UChar32 start, int32_t offset) {
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32_t foldedValue, value;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 limit;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UBool inBlockZero;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    foldedValue=0;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    limit=start+0x400;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while(start<limit) {
96        value=utrie_get32(trie, start, &inBlockZero);
97        if(inBlockZero) {
98            start+=UTRIE_DATA_BLOCK_LENGTH;
99        } else {
100            foldedValue|=value;
101            ++start;
102        }
103    }
104
105    if(foldedValue!=0) {
106        return (uint32_t)(offset|0x8000);
107    } else {
108        return 0;
109    }
110}
111
112static int32_t U_CALLCONV
113_testFoldingOffset16(uint32_t data) {
114    if(data&0x8000) {
115        return (int32_t)(data&0x7fff);
116    } else {
117        return 0;
118    }
119}
120
121static uint32_t U_CALLCONV
122_testEnumValue(const void *context, uint32_t value) {
123    return value^0x5555;
124}
125
126static UBool U_CALLCONV
127_testEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
128    const CheckRange **pb=(const CheckRange **)context;
129    const CheckRange *b=(*pb)++;
130
131    value^=0x5555;
132    if(start!=(b-1)->limit || limit!=b->limit || value!=b->value) {
133        log_err("error: utrie_enum() delivers wrong range [U+%04lx..U+%04lx[.0x%lx instead of [U+%04lx..U+%04lx[.0x%lx\n",
134            start, limit, value,
135            (b-1)->limit, b->limit, b->value);
136    }
137    return TRUE;
138}
139
140static void
141testTrieIteration(const char *testName,
142                  const UTrie *trie,
143                  const CheckRange checkRanges[], int32_t countCheckRanges) {
144    UChar s[100];
145    uint32_t values[30];
146
147    const UChar *p, *limit;
148
149    uint32_t value;
150    UChar32 c;
151    int32_t i, length, countValues;
152    UChar c2;
153
154    /* write a string */
155    length=countValues=0;
156    for(i=0; i<countCheckRanges; ++i) {
157        c=checkRanges[i].limit;
158        if(c!=0) {
159            --c;
160            U16_APPEND_UNSAFE(s, length, c);
161            values[countValues++]=checkRanges[i].value;
162        }
163    }
164    limit=s+length;
165
166    /* try forward */
167    p=s;
168    i=0;
169    while(p<limit) {
170        c=c2=0x33;
171        if(trie->data32!=NULL) {
172            UTRIE_NEXT32(trie, p, limit, c, c2, value);
173        } else {
174            UTRIE_NEXT16(trie, p, limit, c, c2, value);
175        }
176        if(value!=values[i]) {
177            log_err("error: wrong value from UTRIE_NEXT(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
178                    testName, c, c2, value, values[i]);
179        }
180        if(
181            c2==0 ?
182                c!=*(p-1) :
183                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
184        ) {
185            log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
186                    testName, c, c2);
187            continue;
188        }
189        if(c2!=0) {
190            int32_t offset;
191
192            if(trie->data32==NULL) {
193                value=UTRIE_GET16_FROM_LEAD(trie, c);
194                offset=trie->getFoldingOffset(value);
195                if(offset>0) {
196                    value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
197                } else {
198                    value=trie->initialValue;
199                }
200            } else {
201                value=UTRIE_GET32_FROM_LEAD(trie, c);
202                offset=trie->getFoldingOffset(value);
203                if(offset>0) {
204                    value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
205                } else {
206                    value=trie->initialValue;
207                }
208            }
209            if(value!=values[i]) {
210                log_err("error: wrong value from UTRIE_GETXX_FROM_OFFSET_TRAIL(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
211                        testName, c, c2, value, values[i]);
212            }
213        }
214        if(c2!=0) {
215            value=0x44;
216            if(trie->data32==NULL) {
217                UTRIE_GET16_FROM_PAIR(trie, c, c2, value);
218            } else {
219                UTRIE_GET32_FROM_PAIR(trie, c, c2, value);
220            }
221            if(value!=values[i]) {
222                log_err("error: wrong value from UTRIE_GETXX_FROM_PAIR(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
223                        testName, c, c2, value, values[i]);
224            }
225        }
226        ++i;
227    }
228
229    /* try backward */
230    p=limit;
231    i=countValues;
232    while(s<p) {
233        --i;
234        c=c2=0x33;
235        if(trie->data32!=NULL) {
236            UTRIE_PREVIOUS32(trie, s, p, c, c2, value);
237        } else {
238            UTRIE_PREVIOUS16(trie, s, p, c, c2, value);
239        }
240        if(value!=values[i]) {
241            log_err("error: wrong value from UTRIE_PREVIOUS(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n",
242                    testName, c, c2, value, values[i]);
243        }
244        if(
245            c2==0 ?
246                c!=*p:
247                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
248        ) {
249            log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
250                    testName, c, c2);
251        }
252    }
253}
254
255static void
256testTrieRangesWithMalloc(const char *testName,
257               const SetRange setRanges[], int32_t countSetRanges,
258               const CheckRange checkRanges[], int32_t countCheckRanges,
259               UBool dataIs32, UBool latin1Linear) {
260    UTrieGetFoldingOffset *getFoldingOffset;
261    const CheckRange *enumRanges;
262    UNewTrie *newTrie;
263    UTrie trie={ 0 };
264    uint32_t value, value2;
265    UChar32 start, limit;
266    int32_t i, length;
267    UErrorCode errorCode;
268    UBool overwrite, ok;
269    uint8_t* storage =NULL;
270    static const int32_t DEFAULT_STORAGE_SIZE = 32768;
271    storage = (uint8_t*) uprv_malloc(sizeof(uint8_t)*DEFAULT_STORAGE_SIZE);
272
273    log_verbose("\ntesting Trie '%s'\n", testName);
274    newTrie=utrie_open(NULL, NULL, 2000,
275                       checkRanges[0].value, checkRanges[0].value,
276                       latin1Linear);
277
278    /* set values from setRanges[] */
279    ok=TRUE;
280    for(i=0; i<countSetRanges; ++i) {
281        start=setRanges[i].start;
282        limit=setRanges[i].limit;
283        value=setRanges[i].value;
284        overwrite=setRanges[i].overwrite;
285        if((limit-start)==1 && overwrite) {
286            ok&=utrie_set32(newTrie, start, value);
287        } else {
288            ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
289        }
290    }
291    if(!ok) {
292        log_err("error: setting values into a trie failed (%s)\n", testName);
293        return;
294    }
295
296    /* verify that all these values are in the new Trie */
297    start=0;
298    for(i=0; i<countCheckRanges; ++i) {
299        limit=checkRanges[i].limit;
300        value=checkRanges[i].value;
301
302        while(start<limit) {
303            if(value!=utrie_get32(newTrie, start, NULL)) {
304                log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
305                        testName, start, utrie_get32(newTrie, start, NULL), value);
306            }
307            ++start;
308        }
309    }
310
311    if(dataIs32) {
312        getFoldingOffset=_testFoldingOffset32;
313    } else {
314        getFoldingOffset=_testFoldingOffset16;
315    }
316
317    errorCode=U_ZERO_ERROR;
318    length=utrie_serialize(newTrie, storage, DEFAULT_STORAGE_SIZE,
319                           dataIs32 ? _testFoldedValue32 : _testFoldedValue16,
320                           (UBool)!dataIs32,
321                           &errorCode);
322    if(U_FAILURE(errorCode)) {
323        log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
324        utrie_close(newTrie);
325        return;
326    }
327
328    /* test linear Latin-1 range from utrie_getData() */
329    if(latin1Linear) {
330        uint32_t *data;
331        int32_t dataLength;
332
333        data=utrie_getData(newTrie, &dataLength);
334        start=0;
335        for(i=0; i<countCheckRanges && start<=0xff; ++i) {
336            limit=checkRanges[i].limit;
337            value=checkRanges[i].value;
338
339            while(start<limit && start<=0xff) {
340                if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
341                    log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
342                            testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
343                }
344                ++start;
345            }
346        }
347    }
348
349    utrie_close(newTrie);
350
351    errorCode=U_ZERO_ERROR;
352    if(!utrie_unserialize(&trie, storage, length, &errorCode)) {
353        log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
354        return;
355    }
356    trie.getFoldingOffset=getFoldingOffset;
357
358    if(dataIs32!=(trie.data32!=NULL)) {
359        log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
360    }
361    if(latin1Linear!=trie.isLatin1Linear) {
362        log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
363    }
364
365    /* verify that all these values are in the unserialized Trie */
366    start=0;
367    for(i=0; i<countCheckRanges; ++i) {
368        limit=checkRanges[i].limit;
369        value=checkRanges[i].value;
370
371        if(start==0xd800) {
372            /* skip surrogates */
373            start=limit;
374            continue;
375        }
376
377        while(start<limit) {
378            if(start<=0xffff) {
379                if(dataIs32) {
380                    value2=UTRIE_GET32_FROM_BMP(&trie, start);
381                } else {
382                    value2=UTRIE_GET16_FROM_BMP(&trie, start);
383                }
384                if(value!=value2) {
385                    log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
386                            testName, start, value2, value);
387                }
388                if(!U16_IS_LEAD(start)) {
389                    if(dataIs32) {
390                        value2=UTRIE_GET32_FROM_LEAD(&trie, start);
391                    } else {
392                        value2=UTRIE_GET16_FROM_LEAD(&trie, start);
393                    }
394                    if(value!=value2) {
395                        log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
396                                testName, start, value2, value);
397                    }
398                }
399            }
400            if(dataIs32) {
401                UTRIE_GET32(&trie, start, value2);
402            } else {
403                UTRIE_GET16(&trie, start, value2);
404            }
405            if(value!=value2) {
406                log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
407                        testName, start, value2, value);
408            }
409            ++start;
410        }
411    }
412
413    /* enumerate and verify all ranges */
414    enumRanges=checkRanges+1;
415    utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);
416
417    /* test linear Latin-1 range */
418    if(trie.isLatin1Linear) {
419        if(trie.data32!=NULL) {
420            const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);
421
422            for(start=0; start<0x100; ++start) {
423                if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
424                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
425                            testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
426                }
427            }
428        } else {
429            const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);
430
431            for(start=0; start<0x100; ++start) {
432                if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
433                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
434                            testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
435                }
436            }
437        }
438    }
439
440    testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
441    uprv_free(storage);
442}
443
444static void
445testTrieRanges(const char *testName,
446               const SetRange setRanges[], int32_t countSetRanges,
447               const CheckRange checkRanges[], int32_t countCheckRanges,
448               UBool dataIs32, UBool latin1Linear) {
449    union{
450        double bogus; /* needed for aligining the storage */
451        uint8_t storage[32768];
452    } storageHolder;
453    UTrieGetFoldingOffset *getFoldingOffset;
454    UNewTrieGetFoldedValue *getFoldedValue;
455    const CheckRange *enumRanges;
456    UNewTrie *newTrie;
457    UTrie trie={ 0 };
458    uint32_t value, value2;
459    UChar32 start, limit;
460    int32_t i, length;
461    UErrorCode errorCode;
462    UBool overwrite, ok;
463
464    log_verbose("\ntesting Trie '%s'\n", testName);
465    newTrie=utrie_open(NULL, NULL, 2000,
466                       checkRanges[0].value, checkRanges[0].value,
467                       latin1Linear);
468
469    /* set values from setRanges[] */
470    ok=TRUE;
471    for(i=0; i<countSetRanges; ++i) {
472        start=setRanges[i].start;
473        limit=setRanges[i].limit;
474        value=setRanges[i].value;
475        overwrite=setRanges[i].overwrite;
476        if((limit-start)==1 && overwrite) {
477            ok&=utrie_set32(newTrie, start, value);
478        } else {
479            ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
480        }
481    }
482    if(!ok) {
483        log_err("error: setting values into a trie failed (%s)\n", testName);
484        return;
485    }
486
487    /* verify that all these values are in the new Trie */
488    start=0;
489    for(i=0; i<countCheckRanges; ++i) {
490        limit=checkRanges[i].limit;
491        value=checkRanges[i].value;
492
493        while(start<limit) {
494            if(value!=utrie_get32(newTrie, start, NULL)) {
495                log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
496                        testName, start, utrie_get32(newTrie, start, NULL), value);
497            }
498            ++start;
499        }
500    }
501
502    if(dataIs32) {
503        getFoldingOffset=_testFoldingOffset32;
504        getFoldedValue=_testFoldedValue32;
505    } else {
506        getFoldingOffset=_testFoldingOffset16;
507        getFoldedValue=_testFoldedValue16;
508    }
509
510    /*
511     * code coverage for utrie.c/defaultGetFoldedValue(),
512     * pick some combination of parameters for selecting the UTrie defaults
513     */
514    if(!dataIs32 && latin1Linear) {
515        getFoldingOffset=NULL;
516        getFoldedValue=NULL;
517    }
518
519    errorCode=U_ZERO_ERROR;
520    length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage),
521                           getFoldedValue,
522                           (UBool)!dataIs32,
523                           &errorCode);
524    if(U_FAILURE(errorCode)) {
525        log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
526        utrie_close(newTrie);
527        return;
528    }
529    if (length >= (int32_t)sizeof(storageHolder.storage)) {
530        log_err("error: utrie_serialize(%s) needs more memory\n", testName);
531        utrie_close(newTrie);
532        return;
533    }
534
535    /* test linear Latin-1 range from utrie_getData() */
536    if(latin1Linear) {
537        uint32_t *data;
538        int32_t dataLength;
539
540        data=utrie_getData(newTrie, &dataLength);
541        start=0;
542        for(i=0; i<countCheckRanges && start<=0xff; ++i) {
543            limit=checkRanges[i].limit;
544            value=checkRanges[i].value;
545
546            while(start<limit && start<=0xff) {
547                if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
548                    log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
549                            testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
550                }
551                ++start;
552            }
553        }
554    }
555
556    utrie_close(newTrie);
557
558    errorCode=U_ZERO_ERROR;
559    if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) {
560        log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
561        return;
562    }
563    if(getFoldingOffset!=NULL) {
564        trie.getFoldingOffset=getFoldingOffset;
565    }
566
567    if(dataIs32!=(trie.data32!=NULL)) {
568        log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
569    }
570    if(latin1Linear!=trie.isLatin1Linear) {
571        log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
572    }
573
574    /* verify that all these values are in the unserialized Trie */
575    start=0;
576    for(i=0; i<countCheckRanges; ++i) {
577        limit=checkRanges[i].limit;
578        value=checkRanges[i].value;
579
580        if(start==0xd800) {
581            /* skip surrogates */
582            start=limit;
583            continue;
584        }
585
586        while(start<limit) {
587            if(start<=0xffff) {
588                if(dataIs32) {
589                    value2=UTRIE_GET32_FROM_BMP(&trie, start);
590                } else {
591                    value2=UTRIE_GET16_FROM_BMP(&trie, start);
592                }
593                if(value!=value2) {
594                    log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
595                            testName, start, value2, value);
596                }
597                if(!U16_IS_LEAD(start)) {
598                    if(dataIs32) {
599                        value2=UTRIE_GET32_FROM_LEAD(&trie, start);
600                    } else {
601                        value2=UTRIE_GET16_FROM_LEAD(&trie, start);
602                    }
603                    if(value!=value2) {
604                        log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
605                                testName, start, value2, value);
606                    }
607                }
608            }
609            if(dataIs32) {
610                UTRIE_GET32(&trie, start, value2);
611            } else {
612                UTRIE_GET16(&trie, start, value2);
613            }
614            if(value!=value2) {
615                log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
616                        testName, start, value2, value);
617            }
618            ++start;
619        }
620    }
621
622    /* enumerate and verify all ranges */
623    enumRanges=checkRanges+1;
624    utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);
625
626    /* test linear Latin-1 range */
627    if(trie.isLatin1Linear) {
628        if(trie.data32!=NULL) {
629            const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);
630
631            for(start=0; start<0x100; ++start) {
632                if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
633                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
634                            testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
635                }
636            }
637        } else {
638            const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);
639
640            for(start=0; start<0x100; ++start) {
641                if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
642                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
643                            testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
644                }
645            }
646        }
647    }
648
649    testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
650}
651
652static void
653testTrieRanges2(const char *testName,
654                const SetRange setRanges[], int32_t countSetRanges,
655                const CheckRange checkRanges[], int32_t countCheckRanges,
656                UBool dataIs32) {
657    char name[40];
658
659    testTrieRanges(testName,
660                   setRanges, countSetRanges,
661                   checkRanges, countCheckRanges,
662                   dataIs32, FALSE);
663    testTrieRangesWithMalloc(testName,
664                   setRanges, countSetRanges,
665                   checkRanges, countCheckRanges,
666                   dataIs32, FALSE);
667
668    uprv_strcpy(name, testName);
669    uprv_strcat(name, "-latin1Linear");
670    testTrieRanges(name,
671                   setRanges, countSetRanges,
672                   checkRanges, countCheckRanges,
673                   dataIs32, TRUE);
674    testTrieRangesWithMalloc(name,
675                   setRanges, countSetRanges,
676                   checkRanges, countCheckRanges,
677                   dataIs32, TRUE);
678}
679
680static void
681testTrieRanges4(const char *testName,
682                const SetRange setRanges[], int32_t countSetRanges,
683                const CheckRange checkRanges[], int32_t countCheckRanges) {
684    char name[40];
685
686    uprv_strcpy(name, testName);
687    uprv_strcat(name, ".32");
688    testTrieRanges2(name,
689                    setRanges, countSetRanges,
690                    checkRanges, countCheckRanges,
691                    TRUE);
692
693    uprv_strcpy(name, testName);
694    uprv_strcat(name, ".16");
695    testTrieRanges2(name,
696                    setRanges, countSetRanges,
697                    checkRanges, countCheckRanges,
698                    FALSE);
699}
700
701/* test data ----------------------------------------------------------------*/
702
703/* set consecutive ranges, even with value 0 */
704static const SetRange
705setRanges1[]={
706    {0,      0x20,       0,      FALSE},
707    {0x20,   0xa7,       0x1234, FALSE},
708    {0xa7,   0x3400,     0,      FALSE},
709    {0x3400, 0x9fa6,     0x6162, FALSE},
710    {0x9fa6, 0xda9e,     0x3132, FALSE},
711    {0xdada, 0xeeee,     0x87ff, FALSE}, /* try to disrupt _testFoldingOffset16() */
712    {0xeeee, 0x11111,    1,      FALSE},
713    {0x11111, 0x44444,   0x6162, FALSE},
714    {0x44444, 0x60003,   0,      FALSE},
715    {0xf0003, 0xf0004,   0xf,    FALSE},
716    {0xf0004, 0xf0006,   0x10,   FALSE},
717    {0xf0006, 0xf0007,   0x11,   FALSE},
718    {0xf0007, 0xf0020,   0x12,   FALSE},
719    {0xf0020, 0x110000,  0,      FALSE}
720};
721
722static const CheckRange
723checkRanges1[]={
724    {0,      0},      /* dummy start range to make _testEnumRange() simpler */
725    {0x20,   0},
726    {0xa7,   0x1234},
727    {0x3400, 0},
728    {0x9fa6, 0x6162},
729    {0xda9e, 0x3132},
730    {0xdada, 0},
731    {0xeeee, 0x87ff},
732    {0x11111,1},
733    {0x44444,0x6162},
734    {0xf0003,0},
735    {0xf0004,0xf},
736    {0xf0006,0x10},
737    {0xf0007,0x11},
738    {0xf0020,0x12},
739    {0x110000, 0}
740};
741
742/* set some interesting overlapping ranges */
743static const SetRange
744setRanges2[]={
745    {0x21,   0x7f,       0x5555, TRUE},
746    {0x2f800,0x2fedc,    0x7a,   TRUE},
747    {0x72,   0xdd,       3,      TRUE},
748    {0xdd,   0xde,       4,      FALSE},
749    {0x201,  0x220,      6,      TRUE},  /* 3 consecutive blocks with the same pattern but discontiguous value ranges */
750    {0x221,  0x240,      6,      TRUE},
751    {0x241,  0x260,      6,      TRUE},
752    {0x2f987,0x2fa98,    5,      TRUE},
753    {0x2f777,0x2f833,    0,      TRUE},
754    {0x2f900,0x2ffee,    1,      FALSE},
755    {0x2ffee,0x2ffef,    2,      TRUE}
756};
757
758static const CheckRange
759checkRanges2[]={
760    {0,      0},      /* dummy start range to make _testEnumRange() simpler */
761    {0x21,   0},
762    {0x72,   0x5555},
763    {0xdd,   3},
764    {0xde,   4},
765    {0x201,  0},
766    {0x220,  6},
767    {0x221,  0},
768    {0x240,  6},
769    {0x241,  0},
770    {0x260,  6},
771    {0x2f833,0},
772    {0x2f987,0x7a},
773    {0x2fa98,5},
774    {0x2fedc,0x7a},
775    {0x2ffee,1},
776    {0x2ffef,2},
777    {0x110000, 0}
778};
779
780/* use a non-zero initial value */
781static const SetRange
782setRanges3[]={
783    {0x31,   0xa4,   1,  FALSE},
784    {0x3400, 0x6789, 2,  FALSE},
785    {0x30000,0x34567,9,  TRUE},
786    {0x45678,0x56789,3,  TRUE}
787};
788
789static const CheckRange
790checkRanges3[]={
791    {0,      9},      /* dummy start range, also carries the initial value */
792    {0x31,   9},
793    {0xa4,   1},
794    {0x3400, 9},
795    {0x6789, 2},
796    {0x45678,9},
797    {0x56789,3},
798    {0x110000,9}
799};
800
801static void
802TrieTest(void) {
803    testTrieRanges4("set1",
804        setRanges1, ARRAY_LENGTH(setRanges1),
805        checkRanges1, ARRAY_LENGTH(checkRanges1));
806    testTrieRanges4("set2-overlap",
807        setRanges2, ARRAY_LENGTH(setRanges2),
808        checkRanges2, ARRAY_LENGTH(checkRanges2));
809    testTrieRanges4("set3-initial-9",
810        setRanges3, ARRAY_LENGTH(setRanges3),
811        checkRanges3, ARRAY_LENGTH(checkRanges3));
812}
813
814/* test utrie_unserializeDummy() -------------------------------------------- */
815
816static int32_t U_CALLCONV
817dummyGetFoldingOffset(uint32_t data) {
818    return -1; /* never get non-initialValue data for supplementary code points */
819}
820
821static void
822dummyTest(UBool make16BitTrie) {
823    int32_t mem[UTRIE_DUMMY_SIZE/4];
824
825    UTrie trie;
826    UErrorCode errorCode;
827    UChar32 c;
828
829    uint32_t value, initialValue, leadUnitValue;
830
831    if(make16BitTrie) {
832        initialValue=0x313;
833        leadUnitValue=0xaffe;
834    } else {
835        initialValue=0x01234567;
836        leadUnitValue=0x89abcdef;
837    }
838
839    errorCode=U_ZERO_ERROR;
840    utrie_unserializeDummy(&trie, mem, sizeof(mem), initialValue, leadUnitValue, make16BitTrie, &errorCode);
841    if(U_FAILURE(errorCode)) {
842        log_err("utrie_unserializeDummy(make16BitTrie=%d) failed - %s\n", make16BitTrie, u_errorName(errorCode));
843        return;
844    }
845    trie.getFoldingOffset=dummyGetFoldingOffset;
846
847    /* test that all code points have initialValue */
848    for(c=0; c<=0x10ffff; ++c) {
849        if(make16BitTrie) {
850            UTRIE_GET16(&trie, c, value);
851        } else {
852            UTRIE_GET32(&trie, c, value);
853        }
854        if(value!=initialValue) {
855            log_err("UTRIE_GET%s(dummy, U+%04lx)=0x%lx instead of 0x%lx\n",
856                make16BitTrie ? "16" : "32", (long)c, (long)value, (long)initialValue);
857        }
858    }
859
860    /* test that the lead surrogate code units have leadUnitValue */
861    for(c=0xd800; c<=0xdbff; ++c) {
862        if(make16BitTrie) {
863            value=UTRIE_GET16_FROM_LEAD(&trie, c);
864        } else {
865            value=UTRIE_GET32_FROM_LEAD(&trie, c);
866        }
867        if(value!=leadUnitValue) {
868            log_err("UTRIE_GET%s_FROM_LEAD(dummy, U+%04lx)=0x%lx instead of 0x%lx\n",
869                make16BitTrie ? "16" : "32", (long)c, (long)value, (long)leadUnitValue);
870        }
871    }
872}
873
874static void
875DummyTrieTest(void) {
876    dummyTest(TRUE);
877    dummyTest(FALSE);
878}
879
880void
881addTrieTest(TestNode** root);
882
883void
884addTrieTest(TestNode** root) {
885    addTest(root, &TrieTest, "tsutil/trietest/TrieTest");
886    addTest(root, &DummyTrieTest, "tsutil/trietest/DummyTrieTest");
887}
888