ucm.c revision c73f511526464f8e56c242df80552e9b0d94ae3d
11e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* 21e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)******************************************************************************* 31e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* 41e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* Copyright (C) 2003-2013, International Business Machines 51e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* Corporation and others. All Rights Reserved. 6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)* 71e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)******************************************************************************* 8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)* file name: ucm.c 91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* encoding: US-ASCII 101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* tab size: 8 (not used) 111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* indentation:4 12f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)* 131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* created on: 2003jun20 14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)* created by: Markus W. Scherer 151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* 161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* This file reads a .ucm file, stores its mappings and sorts them. 171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* It implements handling of Unicode conversion mappings from .ucm files 18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)* for makeconv, canonucm, rptp2ucm, etc. 191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* 201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* Unicode code point sequences with a length of more than 1, 211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* as well as byte sequences with more than 4 bytes or more than one complete 221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)* character sequence are handled to support m:n mappings. 23a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)*/ 241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "unicode/utypes.h" 26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "unicode/ustring.h" 271e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "cstring.h" 28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "cmemory.h" 291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "filestrm.h" 305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "uarrsort.h" 311e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "ucnvmbcs.h" 321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "ucnv_bld.h" 331e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "ucnv_ext.h" 341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "uparse.h" 351e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include "ucm.h" 361e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#include <stdio.h> 371e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 381e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)#if !UCONFIG_NO_CONVERSION 391e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 401e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* -------------------------------------------------------------------------- */ 411e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 421e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static void 431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { 441e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t j; 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for(j=0; j<m->uLen; ++j) { 47f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fprintf(f, "<U%04lX>", (long)codePoints[j]); 48f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 49f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 50f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fputc(' ', f); 51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for(j=0; j<m->bLen; ++j) { 53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fprintf(f, "\\x%02X", bytes[j]); 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if(m->f>=0) { 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fprintf(f, " |%u\n", m->f); 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } else { 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fputs("\n", f); 60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)U_CAPI void U_EXPORT2 64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { 65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); 66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)U_CAPI void U_EXPORT2 69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { 70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) UCMapping *m; 71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int32_t i, length; 72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) m=table->mappings; 74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) length=table->mappingsLength; 751e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(byUnicode) { 765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for(i=0; i<length; ++m, ++i) { 7723730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) ucm_printMapping(table, m, f); 7823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) } 7923730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) } else { 8023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) const int32_t *map=table->reverseMap; 8123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) for(i=0; i<length; ++i) { 8223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) ucm_printMapping(table, m+map[i], f); 8323730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) } 841e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 851e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 861e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 871e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* mapping comparisons ------------------------------------------------------ */ 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 891e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static int32_t 901e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)compareUnicode(UCMTable *lTable, const UCMapping *l, 911e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UCMTable *rTable, const UCMapping *r) { 921e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) const UChar32 *lu, *ru; 931e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t result, i, length; 941e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(l->uLen==1 && r->uLen==1) { 961e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* compare two single code points */ 971e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return l->u-r->u; 981e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 991e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1001e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* get pointers to the code point sequences */ 1011e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) lu=UCM_GET_CODE_POINTS(lTable, l); 1021e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ru=UCM_GET_CODE_POINTS(rTable, r); 1031e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1041e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* get the minimum length */ 1055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(l->uLen<=r->uLen) { 1065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) length=l->uLen; 1075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } else { 1085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) length=r->uLen; 1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* compare the code points */ 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for(i=0; i<length; ++i) { 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) result=lu[i]-ru[i]; 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(result!=0) { 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return result; 1161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* compare the lengths */ 1201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return l->uLen-r->uLen; 1211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 1221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static int32_t 1241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)compareBytes(UCMTable *lTable, const UCMapping *l, 125116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch UCMTable *rTable, const UCMapping *r, 126116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch UBool lexical) { 1271e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) const uint8_t *lb, *rb; 1281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t result, i, length; 1291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1301e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* 1311e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * A lexical comparison is used for sorting in the builder, to allow 1321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * an efficient search for a byte sequence that could be a prefix 1331e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * of a previously entered byte sequence. 1341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * 1351e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * Comparing by lengths first is for compatibility with old .ucm tools 1361e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * like canonucm and rptp2ucm. 137a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) */ 1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(lexical) { 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* get the minimum length and continue */ 140a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if(l->bLen<=r->bLen) { 1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) length=l->bLen; 1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } else { 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) length=r->bLen; 1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 145a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } else { 146a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) /* compare lengths first */ 147a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) result=l->bLen-r->bLen; 1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(result!=0) { 149a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) return result; 1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } else { 1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) length=l->bLen; 152a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } 1535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 155a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) /* get pointers to the byte sequences */ 156a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) lb=UCM_GET_BYTES(lTable, l); 157a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) rb=UCM_GET_BYTES(rTable, r); 158a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) 159a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) /* compare the bytes */ 160a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) for(i=0; i<length; ++i) { 1611e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=lb[i]-rb[i]; 1621e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(result!=0) { 1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return result; 1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1651e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1661e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* compare the lengths */ 1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return l->bLen-r->bLen; 1691e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 1701e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1711e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* compare UCMappings for sorting */ 1721e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static int32_t 1731e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)compareMappings(UCMTable *lTable, const UCMapping *l, 1745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) UCMTable *rTable, const UCMapping *r, 1751e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UBool uFirst) { 1761e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t result; 1771e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 1781e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* choose which side to compare first */ 1791e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(uFirst) { 1801e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* Unicode then bytes */ 1811e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=compareUnicode(lTable, l, rTable, r); 1821e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(result==0) { 1831e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ 1841e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1851e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } else { 1861e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* bytes then Unicode */ 1871e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ 1881e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(result==0) { 1891e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=compareUnicode(lTable, l, rTable, r); 1901e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1911e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(result!=0) { 1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return result; 1955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* compare the flags */ 1985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return l->f-r->f; 1995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 2005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)/* sorting by Unicode first sorts mappings directly */ 2025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)static int32_t 2035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { 2045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return compareMappings( 2051e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) (UCMTable *)context, (const UCMapping *)left, 2061e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) (UCMTable *)context, (const UCMapping *)right, TRUE); 2071e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 2081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2091e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ 2101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static int32_t 2111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)compareMappingsBytesFirst(const void *context, const void *left, const void *right) { 2121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UCMTable *table=(UCMTable *)context; 2131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; 2141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return compareMappings( 2151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) table, table->mappings+l, 2165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) table, table->mappings+r, FALSE); 2171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 2181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)U_CAPI void U_EXPORT2 2201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)ucm_sortTable(UCMTable *t) { 2211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UErrorCode errorCode; 2221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t i; 2231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(t->isSorted) { 2251e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return; 2261e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2271e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2281e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) errorCode=U_ZERO_ERROR; 2291e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* 1. sort by Unicode first */ 2311e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), 2321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) compareMappingsUnicodeFirst, t, 2331e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) FALSE, &errorCode); 2341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2351e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* build the reverseMap */ 2361e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(t->reverseMap==NULL) { 2371e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* 2381e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * allocate mappingsCapacity instead of mappingsLength so that 2391e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * if mappings are added, the reverseMap need not be 2401e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * reallocated each time 2411e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * (see ucm_moveMappings() and ucm_addMapping()) 2421e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) */ 2431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); 2441e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(t->reverseMap==NULL) { 2451e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); 2465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) exit(U_MEMORY_ALLOCATION_ERROR); 2471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2491e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) for(i=0; i<t->mappingsLength; ++i) { 2501e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) t->reverseMap[i]=i; 2511e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2521e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2531e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* 2. sort reverseMap by mappings bytes first */ 2545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), 2551e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) compareMappingsBytesFirst, t, 2561e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) FALSE, &errorCode); 2571e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2581e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(U_FAILURE(errorCode)) { 2591e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", 2601e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) u_errorName(errorCode)); 2611e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) exit(errorCode); 2621e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t->isSorted=TRUE; 2655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 2665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)/* 2681e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * remove mappings with their move flag set from the base table 2691e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * and move some of them (with UCM_MOVE_TO_EXT) to the extension table 2701e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) */ 2711e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)U_CAPI void U_EXPORT2 2721e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)ucm_moveMappings(UCMTable *base, UCMTable *ext) { 2731e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UCMapping *mb, *mbLimit; 2741e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int8_t flag; 2751e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2761e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mb=base->mappings; 2771e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mbLimit=mb+base->mappingsLength; 2781e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 279f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) while(mb<mbLimit) { 2801e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) flag=mb->moveFlag; 2811e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(flag!=0) { 2821e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* reset the move flag */ 2831e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mb->moveFlag=0; 2841e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 2851e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { 2865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* add the mapping to the extension table */ 2875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); 2885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 2895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* remove this mapping: move the last base mapping down and overwrite the current one */ 2911e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(mb<(mbLimit-1)) { 2925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); 2931e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 2941e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) --mbLimit; 2951e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) --base->mappingsLength; 2961e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) base->isSorted=FALSE; 2971e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } else { 2981e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ++mb; 2991e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 3005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 3011e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)} 3021e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3031e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)enum { 3041e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) NEEDS_MOVE=1, 3051e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) HAS_ERRORS=2 3061e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)}; 3071e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)static uint8_t 3091e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, 3101e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UBool moveToExt, UBool intersectBase) { 3111e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UCMapping *mb, *me, *mbLimit, *meLimit; 3121e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) int32_t cmp; 3131e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) uint8_t result; 3141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mb=base->mappings; 3161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mbLimit=mb+base->mappingsLength; 3171e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3181e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) me=ext->mappings; 3191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) meLimit=me+ext->mappingsLength; 3201e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3211e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result=0; 3221e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3231e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) for(;;) { 3241e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* skip irrelevant mappings on both sides */ 325a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) for(;;) { 326a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if(mb==mbLimit) { 3275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return result; 328a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) } 329a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) 330a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if((0<=mb->f && mb->f<=2) || mb->f==4) { 331a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) break; 3321e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 3335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ++mb; 3355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 3365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3371e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) for(;;) { 3385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(me==meLimit) { 3391e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return result; 3401e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 3411e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3421e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if((0<=me->f && me->f<=2) || me->f==4) { 3431e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) break; 3441e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 3451e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3461e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ++me; 3471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } 3481e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3491e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* compare the base and extension mappings */ 3501e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) cmp=compareUnicode(base, mb, ext, me); 3511e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(cmp<0) { 3521e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { 3531e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* 3541e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * mapping in base but not in ext, move it 3551e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * 356a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) * if ext is DBCS, move DBCS mappings here 3571e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * and check SBCS ones for Unicode prefix below 3581e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) */ 3591e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mb->moveFlag|=UCM_MOVE_TO_EXT; 3601e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result|=NEEDS_MOVE; 3611e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 3621e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* does mb map from an input sequence that is a prefix of me's? */ 3631e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } else if( mb->uLen<me->uLen && 3641e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) 3651e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ) { 3661e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) if(moveToExt) { 3671e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) /* mark this mapping to be moved to the extension table */ 3681e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) mb->moveFlag|=UCM_MOVE_TO_EXT; 3691e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) result|=NEEDS_MOVE; 3701e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) } else { 3711e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) fprintf(stderr, 372f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "ucm error: the base table contains a mapping whose input sequence\n" 373f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) " is a prefix of the input sequence of an extension mapping\n"); 374f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ucm_printMapping(base, mb, stderr); 375f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ucm_printMapping(ext, me, stderr); 376f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) result|=HAS_ERRORS; 377f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 378f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 379f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 380f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ++mb; 3815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } else if(cmp==0) { 382f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) /* 383f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * same output: remove the extension mapping, 384f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * otherwise treat as an error 385f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) */ 386f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if( mb->f==me->f && mb->bLen==me->bLen && 387f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) 388f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ) { 389f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) me->moveFlag|=UCM_REMOVE_MAPPING; 390f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) result|=NEEDS_MOVE; 391f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } else if(intersectBase) { 392f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) /* mapping in base but not in ext, move it */ 393f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) mb->moveFlag|=UCM_MOVE_TO_EXT; 394f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) result|=NEEDS_MOVE; 395f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } else { 396f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fprintf(stderr, 397f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "ucm error: the base table contains a mapping whose input sequence\n" 398f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) " is the same as the input sequence of an extension mapping\n" 399f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) " but it maps differently\n"); 400f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ucm_printMapping(base, mb, stderr); 401f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ucm_printMapping(ext, me, stderr); 402f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) result|=HAS_ERRORS; 403f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 404f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 405f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ++mb; 406f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } else /* cmp>0 */ { 407f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ++me; 408f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 4095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 4105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 411a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) 4125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)static uint8_t 413effb81e5f8246d0db0270817048dc992db66e9fbBen MurdochcheckBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, 414effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch UBool moveToExt, UBool intersectBase) { 415effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch UCMapping *mb, *me; 416f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int32_t *baseMap, *extMap; 4175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int32_t b, e, bLimit, eLimit, cmp; 418f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) uint8_t result; 4191e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) UBool isSISO; 420 421 baseMap=base->reverseMap; 422 extMap=ext->reverseMap; 423 424 b=e=0; 425 bLimit=base->mappingsLength; 426 eLimit=ext->mappingsLength; 427 428 result=0; 429 430 isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); 431 432 for(;;) { 433 /* skip irrelevant mappings on both sides */ 434 for(;; ++b) { 435 if(b==bLimit) { 436 return result; 437 } 438 mb=base->mappings+baseMap[b]; 439 440 if(intersectBase==2 && mb->bLen==1) { 441 /* 442 * comparing a base against a DBCS extension: 443 * leave SBCS base mappings alone 444 */ 445 continue; 446 } 447 448 if(mb->f==0 || mb->f==3) { 449 break; 450 } 451 } 452 453 for(;;) { 454 if(e==eLimit) { 455 return result; 456 } 457 me=ext->mappings+extMap[e]; 458 459 if(me->f==0 || me->f==3) { 460 break; 461 } 462 463 ++e; 464 } 465 466 /* compare the base and extension mappings */ 467 cmp=compareBytes(base, mb, ext, me, TRUE); 468 if(cmp<0) { 469 if(intersectBase) { 470 /* mapping in base but not in ext, move it */ 471 mb->moveFlag|=UCM_MOVE_TO_EXT; 472 result|=NEEDS_MOVE; 473 474 /* 475 * does mb map from an input sequence that is a prefix of me's? 476 * for SI/SO tables, a single byte is never a prefix because it 477 * occurs in a separate single-byte state 478 */ 479 } else if( mb->bLen<me->bLen && 480 (!isSISO || mb->bLen>1) && 481 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) 482 ) { 483 if(moveToExt) { 484 /* mark this mapping to be moved to the extension table */ 485 mb->moveFlag|=UCM_MOVE_TO_EXT; 486 result|=NEEDS_MOVE; 487 } else { 488 fprintf(stderr, 489 "ucm error: the base table contains a mapping whose input sequence\n" 490 " is a prefix of the input sequence of an extension mapping\n"); 491 ucm_printMapping(base, mb, stderr); 492 ucm_printMapping(ext, me, stderr); 493 result|=HAS_ERRORS; 494 } 495 } 496 497 ++b; 498 } else if(cmp==0) { 499 /* 500 * same output: remove the extension mapping, 501 * otherwise treat as an error 502 */ 503 if( mb->f==me->f && mb->uLen==me->uLen && 504 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) 505 ) { 506 me->moveFlag|=UCM_REMOVE_MAPPING; 507 result|=NEEDS_MOVE; 508 } else if(intersectBase) { 509 /* mapping in base but not in ext, move it */ 510 mb->moveFlag|=UCM_MOVE_TO_EXT; 511 result|=NEEDS_MOVE; 512 } else { 513 fprintf(stderr, 514 "ucm error: the base table contains a mapping whose input sequence\n" 515 " is the same as the input sequence of an extension mapping\n" 516 " but it maps differently\n"); 517 ucm_printMapping(base, mb, stderr); 518 ucm_printMapping(ext, me, stderr); 519 result|=HAS_ERRORS; 520 } 521 522 ++b; 523 } else /* cmp>0 */ { 524 ++e; 525 } 526 } 527} 528 529U_CAPI UBool U_EXPORT2 530ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { 531 UCMapping *m, *mLimit; 532 int32_t count; 533 UBool isOK; 534 535 m=table->mappings; 536 mLimit=m+table->mappingsLength; 537 isOK=TRUE; 538 539 while(m<mLimit) { 540 count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); 541 if(count<1) { 542 ucm_printMapping(table, m, stderr); 543 isOK=FALSE; 544 } 545 ++m; 546 } 547 548 return isOK; 549} 550 551U_CAPI UBool U_EXPORT2 552ucm_checkBaseExt(UCMStates *baseStates, 553 UCMTable *base, UCMTable *ext, UCMTable *moveTarget, 554 UBool intersectBase) { 555 uint8_t result; 556 557 /* if we have an extension table, we must always use precision flags */ 558 if(base->flagsType&UCM_FLAGS_IMPLICIT) { 559 fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); 560 return FALSE; 561 } 562 if(ext->flagsType&UCM_FLAGS_IMPLICIT) { 563 fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); 564 return FALSE; 565 } 566 567 /* checking requires both tables to be sorted */ 568 ucm_sortTable(base); 569 ucm_sortTable(ext); 570 571 /* check */ 572 result= 573 checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| 574 checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); 575 576 if(result&HAS_ERRORS) { 577 return FALSE; 578 } 579 580 if(result&NEEDS_MOVE) { 581 ucm_moveMappings(ext, NULL); 582 ucm_moveMappings(base, moveTarget); 583 ucm_sortTable(base); 584 ucm_sortTable(ext); 585 if(moveTarget!=NULL) { 586 ucm_sortTable(moveTarget); 587 } 588 } 589 590 return TRUE; 591} 592 593/* merge tables for rptp2ucm ------------------------------------------------ */ 594 595U_CAPI void U_EXPORT2 596ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, 597 const uint8_t *subchar, int32_t subcharLength, 598 uint8_t subchar1) { 599 UCMapping *fromUMapping, *toUMapping; 600 int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; 601 602 ucm_sortTable(fromUTable); 603 ucm_sortTable(toUTable); 604 605 fromUMapping=fromUTable->mappings; 606 toUMapping=toUTable->mappings; 607 608 fromUTop=fromUTable->mappingsLength; 609 toUTop=toUTable->mappingsLength; 610 611 fromUIndex=toUIndex=0; 612 613 while(fromUIndex<fromUTop && toUIndex<toUTop) { 614 cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE); 615 if(cmp==0) { 616 /* equal: roundtrip, nothing to do (flags are initially 0) */ 617 ++fromUMapping; 618 ++toUMapping; 619 620 ++fromUIndex; 621 ++toUIndex; 622 } else if(cmp<0) { 623 /* 624 * the fromU mapping does not have a toU counterpart: 625 * fallback Unicode->codepage 626 */ 627 if( (fromUMapping->bLen==subcharLength && 628 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || 629 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) 630 ) { 631 fromUMapping->f=2; /* SUB mapping */ 632 } else { 633 fromUMapping->f=1; /* normal fallback */ 634 } 635 636 ++fromUMapping; 637 ++fromUIndex; 638 } else { 639 /* 640 * the toU mapping does not have a fromU counterpart: 641 * (reverse) fallback codepage->Unicode, copy it to the fromU table 642 */ 643 644 /* ignore reverse fallbacks to Unicode SUB */ 645 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { 646 toUMapping->f=3; /* reverse fallback */ 647 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); 648 649 /* the table may have been reallocated */ 650 fromUMapping=fromUTable->mappings+fromUIndex; 651 } 652 653 ++toUMapping; 654 ++toUIndex; 655 } 656 } 657 658 /* either one or both tables are exhausted */ 659 while(fromUIndex<fromUTop) { 660 /* leftover fromU mappings are fallbacks */ 661 if( (fromUMapping->bLen==subcharLength && 662 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || 663 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) 664 ) { 665 fromUMapping->f=2; /* SUB mapping */ 666 } else { 667 fromUMapping->f=1; /* normal fallback */ 668 } 669 670 ++fromUMapping; 671 ++fromUIndex; 672 } 673 674 while(toUIndex<toUTop) { 675 /* leftover toU mappings are reverse fallbacks */ 676 677 /* ignore reverse fallbacks to Unicode SUB */ 678 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { 679 toUMapping->f=3; /* reverse fallback */ 680 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); 681 } 682 683 ++toUMapping; 684 ++toUIndex; 685 } 686 687 fromUTable->isSorted=FALSE; 688} 689 690/* separate extension mappings out of base table for rptp2ucm --------------- */ 691 692U_CAPI UBool U_EXPORT2 693ucm_separateMappings(UCMFile *ucm, UBool isSISO) { 694 UCMTable *table; 695 UCMapping *m, *mLimit; 696 int32_t type; 697 UBool needsMove, isOK; 698 699 table=ucm->base; 700 m=table->mappings; 701 mLimit=m+table->mappingsLength; 702 703 needsMove=FALSE; 704 isOK=TRUE; 705 706 for(; m<mLimit; ++m) { 707 if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { 708 fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); 709 ucm_printMapping(table, m, stderr); 710 m->moveFlag|=UCM_REMOVE_MAPPING; 711 needsMove=TRUE; 712 continue; 713 } 714 715 type=ucm_mappingType( 716 &ucm->states, m, 717 UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); 718 if(type<0) { 719 /* illegal byte sequence */ 720 printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); 721 isOK=FALSE; 722 } else if(type>0) { 723 m->moveFlag|=UCM_MOVE_TO_EXT; 724 needsMove=TRUE; 725 } 726 } 727 728 if(!isOK) { 729 return FALSE; 730 } 731 if(needsMove) { 732 ucm_moveMappings(ucm->base, ucm->ext); 733 return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); 734 } else { 735 ucm_sortTable(ucm->base); 736 return TRUE; 737 } 738} 739 740/* ucm parser --------------------------------------------------------------- */ 741 742U_CAPI int8_t U_EXPORT2 743ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { 744 const char *s=*ps; 745 char *end; 746 uint8_t byte; 747 int8_t bLen; 748 749 bLen=0; 750 for(;;) { 751 /* skip an optional plus sign */ 752 if(bLen>0 && *s=='+') { 753 ++s; 754 } 755 if(*s!='\\') { 756 break; 757 } 758 759 if( s[1]!='x' || 760 (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 761 ) { 762 fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); 763 return -1; 764 } 765 766 if(bLen==UCNV_EXT_MAX_BYTES) { 767 fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); 768 return -1; 769 } 770 bytes[bLen++]=byte; 771 s=end; 772 } 773 774 *ps=s; 775 return bLen; 776} 777 778/* parse a mapping line; must not be empty */ 779U_CAPI UBool U_EXPORT2 780ucm_parseMappingLine(UCMapping *m, 781 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 782 uint8_t bytes[UCNV_EXT_MAX_BYTES], 783 const char *line) { 784 const char *s; 785 char *end; 786 UChar32 cp; 787 int32_t u16Length; 788 int8_t uLen, bLen, f; 789 790 s=line; 791 uLen=bLen=0; 792 793 /* parse code points */ 794 for(;;) { 795 /* skip an optional plus sign */ 796 if(uLen>0 && *s=='+') { 797 ++s; 798 } 799 if(*s!='<') { 800 break; 801 } 802 803 if( s[1]!='U' || 804 (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || 805 *end!='>' 806 ) { 807 fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); 808 return FALSE; 809 } 810 if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { 811 fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); 812 return FALSE; 813 } 814 815 if(uLen==UCNV_EXT_MAX_UCHARS) { 816 fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); 817 return FALSE; 818 } 819 codePoints[uLen++]=cp; 820 s=end+1; 821 } 822 823 if(uLen==0) { 824 fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); 825 return FALSE; 826 } else if(uLen==1) { 827 m->u=codePoints[0]; 828 } else { 829 UErrorCode errorCode=U_ZERO_ERROR; 830 u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); 831 if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || 832 u16Length>UCNV_EXT_MAX_UCHARS 833 ) { 834 fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); 835 return FALSE; 836 } 837 } 838 839 s=u_skipWhitespace(s); 840 841 /* parse bytes */ 842 bLen=ucm_parseBytes(bytes, line, &s); 843 844 if(bLen<0) { 845 return FALSE; 846 } else if(bLen==0) { 847 fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); 848 return FALSE; 849 } else if(bLen<=4) { 850 uprv_memcpy(m->b.bytes, bytes, bLen); 851 } 852 853 /* skip everything until the fallback indicator, even the start of a comment */ 854 for(;;) { 855 if(*s==0) { 856 f=-1; /* no fallback indicator */ 857 break; 858 } else if(*s=='|') { 859 f=(int8_t)(s[1]-'0'); 860 if((uint8_t)f>4) { 861 fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); 862 return FALSE; 863 } 864 break; 865 } 866 ++s; 867 } 868 869 m->uLen=uLen; 870 m->bLen=bLen; 871 m->f=f; 872 return TRUE; 873} 874 875/* general APIs ------------------------------------------------------------- */ 876 877U_CAPI UCMTable * U_EXPORT2 878ucm_openTable() { 879 UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); 880 if(table==NULL) { 881 fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); 882 exit(U_MEMORY_ALLOCATION_ERROR); 883 } 884 885 memset(table, 0, sizeof(UCMTable)); 886 return table; 887} 888 889U_CAPI void U_EXPORT2 890ucm_closeTable(UCMTable *table) { 891 if(table!=NULL) { 892 uprv_free(table->mappings); 893 uprv_free(table->codePoints); 894 uprv_free(table->bytes); 895 uprv_free(table->reverseMap); 896 uprv_free(table); 897 } 898} 899 900U_CAPI void U_EXPORT2 901ucm_resetTable(UCMTable *table) { 902 if(table!=NULL) { 903 table->mappingsLength=0; 904 table->flagsType=0; 905 table->unicodeMask=0; 906 table->bytesLength=table->codePointsLength=0; 907 table->isSorted=FALSE; 908 } 909} 910 911U_CAPI void U_EXPORT2 912ucm_addMapping(UCMTable *table, 913 UCMapping *m, 914 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 915 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 916 UCMapping *tm; 917 UChar32 c; 918 int32_t idx; 919 920 if(table->mappingsLength>=table->mappingsCapacity) { 921 /* make the mappings array larger */ 922 if(table->mappingsCapacity==0) { 923 table->mappingsCapacity=1000; 924 } else { 925 table->mappingsCapacity*=10; 926 } 927 table->mappings=(UCMapping *)uprv_realloc(table->mappings, 928 table->mappingsCapacity*sizeof(UCMapping)); 929 if(table->mappings==NULL) { 930 fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", 931 (int)table->mappingsCapacity); 932 exit(U_MEMORY_ALLOCATION_ERROR); 933 } 934 935 if(table->reverseMap!=NULL) { 936 /* the reverseMap must be reallocated in a new sort */ 937 uprv_free(table->reverseMap); 938 table->reverseMap=NULL; 939 } 940 } 941 942 if(m->uLen>1 && table->codePointsCapacity==0) { 943 table->codePointsCapacity=10000; 944 table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); 945 if(table->codePoints==NULL) { 946 fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", 947 (int)table->codePointsCapacity); 948 exit(U_MEMORY_ALLOCATION_ERROR); 949 } 950 } 951 952 if(m->bLen>4 && table->bytesCapacity==0) { 953 table->bytesCapacity=10000; 954 table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); 955 if(table->bytes==NULL) { 956 fprintf(stderr, "ucm error: unable to allocate %d bytes\n", 957 (int)table->bytesCapacity); 958 exit(U_MEMORY_ALLOCATION_ERROR); 959 } 960 } 961 962 if(m->uLen>1) { 963 idx=table->codePointsLength; 964 table->codePointsLength+=m->uLen; 965 if(table->codePointsLength>table->codePointsCapacity) { 966 fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); 967 exit(U_MEMORY_ALLOCATION_ERROR); 968 } 969 970 uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4); 971 m->u=idx; 972 } 973 974 if(m->bLen>4) { 975 idx=table->bytesLength; 976 table->bytesLength+=m->bLen; 977 if(table->bytesLength>table->bytesCapacity) { 978 fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); 979 exit(U_MEMORY_ALLOCATION_ERROR); 980 } 981 982 uprv_memcpy(table->bytes+idx, bytes, m->bLen); 983 m->b.idx=idx; 984 } 985 986 /* set unicodeMask */ 987 for(idx=0; idx<m->uLen; ++idx) { 988 c=codePoints[idx]; 989 if(c>=0x10000) { 990 table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ 991 } else if(U_IS_SURROGATE(c)) { 992 table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ 993 } 994 } 995 996 /* set flagsType */ 997 if(m->f<0) { 998 table->flagsType|=UCM_FLAGS_IMPLICIT; 999 } else { 1000 table->flagsType|=UCM_FLAGS_EXPLICIT; 1001 } 1002 1003 tm=table->mappings+table->mappingsLength++; 1004 uprv_memcpy(tm, m, sizeof(UCMapping)); 1005 1006 table->isSorted=FALSE; 1007} 1008 1009U_CAPI UCMFile * U_EXPORT2 1010ucm_open() { 1011 UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); 1012 if(ucm==NULL) { 1013 fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); 1014 exit(U_MEMORY_ALLOCATION_ERROR); 1015 } 1016 1017 memset(ucm, 0, sizeof(UCMFile)); 1018 1019 ucm->base=ucm_openTable(); 1020 ucm->ext=ucm_openTable(); 1021 1022 ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; 1023 ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; 1024 ucm->states.outputType=-1; 1025 ucm->states.minCharLength=ucm->states.maxCharLength=1; 1026 1027 return ucm; 1028} 1029 1030U_CAPI void U_EXPORT2 1031ucm_close(UCMFile *ucm) { 1032 if(ucm!=NULL) { 1033 ucm_closeTable(ucm->base); 1034 ucm_closeTable(ucm->ext); 1035 uprv_free(ucm); 1036 } 1037} 1038 1039U_CAPI int32_t U_EXPORT2 1040ucm_mappingType(UCMStates *baseStates, 1041 UCMapping *m, 1042 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1043 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 1044 /* check validity of the bytes and count the characters in them */ 1045 int32_t count=ucm_countChars(baseStates, bytes, m->bLen); 1046 if(count<1) { 1047 /* illegal byte sequence */ 1048 return -1; 1049 } 1050 1051 /* 1052 * Suitable for an ICU conversion base table means: 1053 * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) 1054 * - precision flag 0..3 1055 * - SBCS: any 1:1 mapping 1056 * (the table stores additional bits to distinguish mapping types) 1057 * - MBCS: not a |2 SUB mapping for <subchar1> 1058 * - MBCS: not a |1 fallback to 0x00 1059 * - MBCS: not a multi-byte mapping with leading 0x00 bytes 1060 * 1061 * Further restrictions for fromUnicode tables 1062 * are enforced in makeconv (MBCSOkForBaseFromUnicode()). 1063 * 1064 * All of the MBCS fromUnicode specific tests could be removed from here, 1065 * but the ones above are for unusual mappings, and removing the tests 1066 * from here would change canonucm output which seems gratuitous. 1067 * (Markus Scherer 2006-nov-28) 1068 * 1069 * Exception: All implicit mappings (f<0) that need to be moved 1070 * because of fromUnicode restrictions _must_ be moved here because 1071 * makeconv uses a hack for moving mappings only for the fromUnicode table 1072 * that only works with non-negative values of f. 1073 */ 1074 if( m->uLen==1 && count==1 && m->f<=3 && 1075 (baseStates->maxCharLength==1 || 1076 !((m->f==2 && m->bLen==1) || 1077 (m->f==1 && bytes[0]==0) || 1078 (m->f<=1 && m->bLen>1 && bytes[0]==0))) 1079 ) { 1080 return 0; /* suitable for a base table */ 1081 } else { 1082 return 1; /* needs to go into an extension table */ 1083 } 1084} 1085 1086U_CAPI UBool U_EXPORT2 1087ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, 1088 UCMapping *m, 1089 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1090 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 1091 int32_t type; 1092 1093 if(m->f==2 && m->uLen>1) { 1094 fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n"); 1095 printMapping(m, codePoints, bytes, stderr); 1096 return FALSE; 1097 } 1098 1099 if(baseStates!=NULL) { 1100 /* check validity of the bytes and count the characters in them */ 1101 type=ucm_mappingType(baseStates, m, codePoints, bytes); 1102 if(type<0) { 1103 /* illegal byte sequence */ 1104 printMapping(m, codePoints, bytes, stderr); 1105 return FALSE; 1106 } 1107 } else { 1108 /* not used - adding a mapping for an extension-only table before its base table is read */ 1109 type=1; 1110 } 1111 1112 /* 1113 * Add the mapping to the base table if this is requested and suitable. 1114 * Otherwise, add it to the extension table. 1115 */ 1116 if(forBase && type==0) { 1117 ucm_addMapping(ucm->base, m, codePoints, bytes); 1118 } else { 1119 ucm_addMapping(ucm->ext, m, codePoints, bytes); 1120 } 1121 1122 return TRUE; 1123} 1124 1125U_CAPI UBool U_EXPORT2 1126ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { 1127 UCMapping m={ 0 }; 1128 UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; 1129 uint8_t bytes[UCNV_EXT_MAX_BYTES]; 1130 1131 const char *s; 1132 1133 /* ignore empty and comment lines */ 1134 if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { 1135 return TRUE; 1136 } 1137 1138 return 1139 ucm_parseMappingLine(&m, codePoints, bytes, line) && 1140 ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); 1141} 1142 1143U_CAPI void U_EXPORT2 1144ucm_readTable(UCMFile *ucm, FileStream* convFile, 1145 UBool forBase, UCMStates *baseStates, 1146 UErrorCode *pErrorCode) { 1147 char line[500]; 1148 char *end; 1149 UBool isOK; 1150 1151 if(U_FAILURE(*pErrorCode)) { 1152 return; 1153 } 1154 1155 isOK=TRUE; 1156 1157 for(;;) { 1158 /* read the next line */ 1159 if(!T_FileStream_readLine(convFile, line, sizeof(line))) { 1160 fprintf(stderr, "incomplete charmap section\n"); 1161 isOK=FALSE; 1162 break; 1163 } 1164 1165 /* remove CR LF */ 1166 end=uprv_strchr(line, 0); 1167 while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { 1168 --end; 1169 } 1170 *end=0; 1171 1172 /* ignore empty and comment lines */ 1173 if(line[0]==0 || line[0]=='#') { 1174 continue; 1175 } 1176 1177 /* stop at the end of the mapping table */ 1178 if(0==uprv_strcmp(line, "END CHARMAP")) { 1179 break; 1180 } 1181 1182 isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); 1183 } 1184 1185 if(!isOK) { 1186 *pErrorCode=U_INVALID_TABLE_FORMAT; 1187 } 1188} 1189#endif 1190