1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (C) 2000-2008, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ucol_elm.h 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created 02/22/2001 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Vladimir Weinstein 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This program reads the Franctional UCA table and generates 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* internal format for UCA table as well as inverse UCA table. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* It then writes binary files containing the data: ucadata.dat 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* & invuca.dat 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef UCOL_UCAELEMS_H 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_UCAELEMS_H 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_tok.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the maximum trie capacity for the mapping trie. 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruDue to current limitations in genuca and the design of UTrie, 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruthis number can't be more than 256K. 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruAs of Unicode 5, it currently could safely go to 128K without 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querua problem. Normally, less than 32K are tailored. 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_ELM_TRIE_CAPACITY 0x40000 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the maxmun capacity for temparay combining class 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * table. The table will be compacted after scanning all the 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode codepoints. 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_MAX_CM_TAB 0x10000 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *CEs; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t size; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ExpansionTable; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar prefixChars[128]; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *prefix; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t prefixSize; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar uchars[128]; 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *cPoints; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t cSize; /* Number of characters in sequence - for contraction */ 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t noOfCEs; /* Number of collation elements */ 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */ 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t mapCE; /* This is the value element maps in original table */ 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sizePrim[128]; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sizeSec[128]; 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t sizeTer[128]; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool variableTop; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool caseBit; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isThai; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} UCAElements; 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *endExpansionCE; 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool *isV; 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t size; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t maxLSize; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t maxVSize; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t maxTSize; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} MaxJamoExpansionTable; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *endExpansionCE; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *expansionCESize; 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position; 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t size; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} MaxExpansionTable; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t index[256]; /* index of cPoints by combining class 0-255. */ 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *cPoints; /* code point array of all combining marks */ 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t size; /* total number of combining marks */ 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} CombinClassTable; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*CompactEIntArray *mapping; */ 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNewTrie *mapping; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ExpansionTable *expansions; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru struct CntTable *contractions; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCATableHeader *image; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColOptionSet *options; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxExpansionTable *maxExpansions; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxJamoExpansionTable *maxJamoExpansions; 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *unsafeCP; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *contrEndCP; 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCollator *UCA; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UHashtable *prefixLookup; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CombinClassTable *cmLookup; /* combining class lookup for tailoring. */ 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} tempUCATable; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar cp; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t cClass; // combining class 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}CompData; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct { 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CompData *precomp; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t precompLen; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *decomp; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t decompLen; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *comp; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t compLen; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t curClass; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t tailoringCM; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cmPos; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}tempTailorContext; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status); 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status); 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status); 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI int32_t U_EXPORT2 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, UErrorCode *status); 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 144