1/*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2001-2008, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  ucol_cnt.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created 02/22/2001
14 *   created by: Vladimir Weinstein
15 *
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
18 *
19 */
20
21#include "unicode/utypes.h"
22
23#if !UCONFIG_NO_COLLATION
24
25#include "unicode/uchar.h"
26#include "ucol_cnt.h"
27#include "cmemory.h"
28
29static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
30    if(tbl->position == tbl->size) {
31        uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
32        if(newData == NULL) {
33            *status = U_MEMORY_ALLOCATION_ERROR;
34            return;
35        }
36        UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
37        if(newCPs == NULL) {
38            uprv_free(newData);
39            *status = U_MEMORY_ALLOCATION_ERROR;
40            return;
41        }
42        tbl->CEs = newData;
43        tbl->codePoints = newCPs;
44        tbl->size *= 2;
45    }
46}
47
48U_CAPI CntTable*  U_EXPORT2
49/*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
50uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
51    if(U_FAILURE(*status)) {
52        return 0;
53    }
54    CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
55    if(tbl == NULL) {
56        *status = U_MEMORY_ALLOCATION_ERROR;
57        return NULL;
58    }
59    tbl->mapping = mapping;
60    tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
61    if(tbl->elements == NULL) {
62        *status = U_MEMORY_ALLOCATION_ERROR;
63        uprv_free(tbl);
64        return NULL;
65    }
66    tbl->capacity = INIT_EXP_TABLE_SIZE;
67    uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
68    tbl->size = 0;
69    tbl->position = 0;
70    tbl->CEs = NULL;
71    tbl->codePoints = NULL;
72    tbl->offsets = NULL;
73    tbl->currentTag = NOT_FOUND_TAG;
74    return tbl;
75}
76
77static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
78    ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
79    if(el == NULL) {
80        goto outOfMemory;
81    }
82    el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
83    if(el->CEs == NULL) {
84        goto outOfMemory;
85    }
86
87    el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
88    if(el->codePoints == NULL) {
89        uprv_free(el->CEs);
90        goto outOfMemory;
91    }
92
93    el->position = 0;
94    el->size = INIT_EXP_TABLE_SIZE;
95    uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
96    uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
97
98    table->elements[table->size] = el;
99
100    //uhash_put(table->elements, (void *)table->size, el, status);
101
102    *key = table->size++;
103
104    if(table->size == table->capacity) {
105        ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
106        // do realloc
107        /*        table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
108        if(newElements == NULL) {
109            uprv_free(el->codePoints);
110            uprv_free(el->CEs);
111            goto outOfMemory;
112        }
113        ContractionTable **oldElements = table->elements;
114        uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
115        uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
116        table->capacity *= 2;
117        table->elements = newElements;
118        uprv_free(oldElements);
119    }
120
121    return el;
122
123outOfMemory:
124    *status = U_MEMORY_ALLOCATION_ERROR;
125    if (el) uprv_free(el);
126    return NULL;
127}
128
129U_CAPI int32_t  U_EXPORT2
130uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
131    int32_t i = 0, j = 0;
132    if(U_FAILURE(*status) || table->size == 0) {
133        return 0;
134    }
135
136    table->position = 0;
137
138    if(table->offsets != NULL) {
139        uprv_free(table->offsets);
140    }
141    table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
142    if(table->offsets == NULL) {
143        *status = U_MEMORY_ALLOCATION_ERROR;
144        return 0;
145    }
146
147
148    /* See how much memory we need */
149    for(i = 0; i<table->size; i++) {
150        table->offsets[i] = table->position+mainOffset;
151        table->position += table->elements[i]->position;
152    }
153
154    /* Allocate it */
155    if(table->CEs != NULL) {
156        uprv_free(table->CEs);
157    }
158    table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
159    if(table->CEs == NULL) {
160        *status = U_MEMORY_ALLOCATION_ERROR;
161        uprv_free(table->offsets);
162        table->offsets = NULL;
163        return 0;
164    }
165    uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
166
167    if(table->codePoints != NULL) {
168        uprv_free(table->codePoints);
169    }
170    table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
171    if(table->codePoints == NULL) {
172        *status = U_MEMORY_ALLOCATION_ERROR;
173        uprv_free(table->offsets);
174        table->offsets = NULL;
175        uprv_free(table->CEs);
176        table->CEs = NULL;
177        return 0;
178    }
179    uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
180
181    /* Now stuff the things in*/
182
183    UChar *cpPointer = table->codePoints;
184    uint32_t *CEPointer = table->CEs;
185    for(i = 0; i<table->size; i++) {
186        int32_t size = table->elements[i]->position;
187        uint8_t ccMax = 0, ccMin = 255, cc = 0;
188        for(j = 1; j<size; j++) {
189            cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
190            if(cc>ccMax) {
191                ccMax = cc;
192            }
193            if(cc<ccMin) {
194                ccMin = cc;
195            }
196            *(cpPointer+j) = table->elements[i]->codePoints[j];
197        }
198        *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
199
200        uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
201        for(j = 0; j<size; j++) {
202            if(isCntTableElement(*(CEPointer+j))) {
203                *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
204            }
205        }
206        cpPointer += size;
207        CEPointer += size;
208    }
209
210    // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
211    // start of the flat file). However, what is done below is just wrong and it affects building of
212    // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
213    // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
214    // feasible.
215    uint32_t CE;
216    for(i = 0; i<=0x10FFFF; i++) {
217        /*CE = ucmpe32_get(table->mapping, i);*/
218        CE = utrie_get32(table->mapping, i, NULL);
219        if(isCntTableElement(CE)) {
220            CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
221            /*ucmpe32_set(table->mapping, i, CE);*/
222            utrie_set32(table->mapping, i, CE);
223        }
224    }
225
226
227    return table->position;
228}
229
230static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
231    ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
232    if(r == NULL) {
233        goto outOfMemory;
234    }
235
236    r->position = t->position;
237    r->size = t->size;
238
239    r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
240    if(r->codePoints == NULL) {
241        goto outOfMemory;
242    }
243    r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
244    if(r->CEs == NULL) {
245        uprv_free(r->codePoints);
246        goto outOfMemory;
247    }
248    uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
249    uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
250
251    return r;
252
253outOfMemory:
254    *status = U_MEMORY_ALLOCATION_ERROR;
255    if (r) uprv_free(r);
256    return NULL;
257}
258
259U_CAPI CntTable* U_EXPORT2
260uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
261    if(U_FAILURE(*status)) {
262        return NULL;
263    }
264    int32_t i = 0;
265    CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
266    /* test for NULL */
267    if (r == NULL) {
268        goto outOfMemory;
269    }
270    r->position = t->position;
271    r->size = t->size;
272    r->capacity = t->capacity;
273
274    r->mapping = t->mapping;
275
276    r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
277    /* test for NULL */
278    if (r->elements == NULL) {
279        goto outOfMemory;
280    }
281    //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
282
283    for(i = 0; i<t->size; i++) {
284        r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
285    }
286
287    if(t->CEs != NULL) {
288        r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
289        /* test for NULL */
290        if (r->CEs == NULL) {
291            uprv_free(r->elements);
292            goto outOfMemory;
293        }
294        uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
295    } else {
296        r->CEs = NULL;
297    }
298
299    if(t->codePoints != NULL) {
300        r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
301        /* test for NULL */
302        if (r->codePoints == NULL) {
303            uprv_free(r->CEs);
304            uprv_free(r->elements);
305            goto outOfMemory;
306        }
307        uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
308    } else {
309        r->codePoints = NULL;
310    }
311
312    if(t->offsets != NULL) {
313        r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
314        /* test for NULL */
315        if (r->offsets == NULL) {
316            uprv_free(r->codePoints);
317            uprv_free(r->CEs);
318            uprv_free(r->elements);
319            goto outOfMemory;
320        }
321        uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
322    } else {
323        r->offsets = NULL;
324    }
325
326    return r;
327
328outOfMemory:
329    *status = U_MEMORY_ALLOCATION_ERROR;
330    if (r) uprv_free(r);
331    return NULL;
332}
333
334U_CAPI void  U_EXPORT2
335uprv_cnttab_close(CntTable *table) {
336    int32_t i = 0;
337    for(i = 0; i<table->size; i++) {
338        uprv_free(table->elements[i]->CEs);
339        uprv_free(table->elements[i]->codePoints);
340        uprv_free(table->elements[i]);
341    }
342    uprv_free(table->elements);
343    uprv_free(table->CEs);
344    uprv_free(table->offsets);
345    uprv_free(table->codePoints);
346    uprv_free(table);
347}
348
349/* this is for adding non contractions */
350U_CAPI uint32_t  U_EXPORT2
351uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
352    element &= 0xFFFFFF;
353
354    ContractionTable *tbl = NULL;
355    if(U_FAILURE(*status)) {
356        return 0;
357    }
358
359    if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
360        return 0;
361    }
362
363    tbl->CEs[tbl->position-1] = value;
364
365    return(constructContractCE(table->currentTag, element));
366}
367
368
369/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
370U_CAPI uint32_t  U_EXPORT2
371uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
372
373    ContractionTable *tbl = NULL;
374
375    if(U_FAILURE(*status)) {
376        return 0;
377    }
378    element &= 0xFFFFFF;
379
380    if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
381        tbl = addATableElement(table, &element, status);
382        if (U_FAILURE(*status)) {
383            return 0;
384        }
385    }
386
387    uprv_growTable(tbl, status);
388
389    uint32_t offset = 0;
390
391
392    while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
393        offset++;
394    }
395
396    uint32_t i = tbl->position;
397    for(i = tbl->position; i > offset; i--) {
398        tbl->CEs[i] = tbl->CEs[i-1];
399        tbl->codePoints[i] = tbl->codePoints[i-1];
400    }
401
402    tbl->CEs[offset] = value;
403    tbl->codePoints[offset] = codePoint;
404
405    tbl->position++;
406
407    return(constructContractCE(table->currentTag, element));
408}
409
410
411/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
412U_CAPI uint32_t  U_EXPORT2
413uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
414
415    element &= 0xFFFFFF;
416
417    ContractionTable *tbl = NULL;
418
419    if(U_FAILURE(*status)) {
420        return 0;
421    }
422
423    if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
424        tbl = addATableElement(table, &element, status);
425        if (U_FAILURE(*status)) {
426            return 0;
427        }
428    }
429
430    uprv_growTable(tbl, status);
431
432    tbl->CEs[tbl->position] = value;
433    tbl->codePoints[tbl->position] = codePoint;
434
435    tbl->position++;
436
437    return(constructContractCE(table->currentTag, element));
438}
439
440/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
441U_CAPI uint32_t  U_EXPORT2
442uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
443
444    element &= 0xFFFFFF;
445    ContractionTable *tbl = NULL;
446
447    if(U_FAILURE(*status)) {
448        return 0;
449    }
450
451    if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
452        tbl = addATableElement(table, &element, status);
453        if (U_FAILURE(*status)) {
454            return 0;
455        }
456
457    }
458
459    if(offset >= tbl->size) {
460        *status = U_INDEX_OUTOFBOUNDS_ERROR;
461        return 0;
462    }
463    tbl->CEs[offset] = value;
464    tbl->codePoints[offset] = codePoint;
465
466    //return(offset);
467    return(constructContractCE(table->currentTag, element));
468}
469
470static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
471    element &= 0xFFFFFF;
472    ContractionTable *tbl = NULL;
473
474    if(element != 0xFFFFFF) {
475        tbl = table->elements[element]; /* This could also return NULL */
476    }
477    return tbl;
478}
479
480static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
481    uint32_t position = 0;
482    if(tbl == NULL) {
483        return -1;
484    }
485
486    while(codePoint > tbl->codePoints[position]) {
487        position++;
488        if(position > tbl->position) {
489            return -1;
490        }
491    }
492    if (codePoint == tbl->codePoints[position]) {
493        return position;
494    } else {
495        return -1;
496    }
497}
498
499static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
500    if(tbl == NULL) {
501        return UCOL_NOT_FOUND;
502    }
503    if((uint32_t)position > tbl->position || position == -1) {
504        return UCOL_NOT_FOUND;
505    } else {
506        return tbl->CEs[position];
507    }
508}
509
510U_CAPI int32_t  U_EXPORT2
511uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
512
513    if(U_FAILURE(*status)) {
514        return 0;
515    }
516
517    return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
518}
519
520U_CAPI uint32_t  U_EXPORT2
521uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
522    if(U_FAILURE(*status)) {
523        return UCOL_NOT_FOUND;
524    }
525
526    return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
527}
528
529U_CAPI uint32_t  U_EXPORT2
530uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
531    if(U_FAILURE(*status)) {
532        return UCOL_NOT_FOUND;
533    }
534    ContractionTable *tbl = _cnttab_getContractionTable(table, element);
535    return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
536}
537
538U_CAPI UBool  U_EXPORT2
539uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
540    if(U_FAILURE(*status)) {
541        return FALSE;
542    }
543
544    while(*(ztString)!=0) {
545        element = uprv_cnttab_findCE(table, element, *(ztString), status);
546        if(element == UCOL_NOT_FOUND) {
547            return FALSE;
548        }
549        if(!isCntTableElement(element)) {
550            return TRUE;
551        }
552        ztString++;
553    }
554    return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
555}
556
557U_CAPI uint32_t  U_EXPORT2
558uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
559
560    element &= 0xFFFFFF;
561    ContractionTable *tbl = NULL;
562
563    if(U_FAILURE(*status)) {
564        return 0;
565    }
566
567    if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
568        return 0;
569    }
570
571    uint32_t position = 0;
572
573    while(codePoint > tbl->codePoints[position]) {
574        position++;
575        if(position > tbl->position) {
576            return UCOL_NOT_FOUND;
577        }
578    }
579    if (codePoint == tbl->codePoints[position]) {
580        tbl->CEs[position] = newCE;
581        return element;
582    } else {
583        return UCOL_NOT_FOUND;
584    }
585}
586
587#endif /* #if !UCONFIG_NO_COLLATION */
588