1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Copyright (C) 2000-2008, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  ucol_elm.h
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created 02/22/2001
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Vladimir Weinstein
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This program reads the Franctional UCA table and generates
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   internal format for UCA table as well as inverse UCA table.
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   It then writes binary files containing the data: ucadata.dat
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   & invuca.dat
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef UCOL_UCAELEMS_H
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_UCAELEMS_H
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_tok.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_imp.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the maximum trie capacity for the mapping trie.
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruDue to current limitations in genuca and the design of UTrie,
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruthis number can't be more than 256K.
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruAs of Unicode 5, it currently could safely go to 128K without
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querua problem. Normally, less than 32K are tailored.
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_ELM_TRIE_CAPACITY 0x40000
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This is the maxmun capacity for temparay combining class
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * table.  The table will be compacted after scanning all the
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode codepoints.
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCOL_MAX_CM_TAB  0x10000
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t *CEs;
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t position;
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t size;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} ExpansionTable;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar prefixChars[128];
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *prefix;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t prefixSize;
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar uchars[128];
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *cPoints;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t cSize;          /* Number of characters in sequence - for contraction */
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t noOfCEs;        /* Number of collation elements                       */
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t CEs[128];      /* These are collation elements - there could be more than one - in case of expansion */
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t mapCE;         /* This is the value element maps in original table   */
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sizePrim[128];
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sizeSec[128];
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t sizeTer[128];
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool variableTop;
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool caseBit;
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isThai;
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} UCAElements;
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t *endExpansionCE;
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool    *isV;
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t   position;
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t   size;
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t   maxLSize;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t   maxVSize;
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t   maxTSize;
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} MaxJamoExpansionTable;
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t *endExpansionCE;
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t  *expansionCESize;
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t   position;
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t   size;
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} MaxExpansionTable;
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t   index[256];  /* index of cPoints by combining class 0-255. */
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar      *cPoints;    /* code point array of all combining marks */
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t   size;        /* total number of combining marks */
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} CombinClassTable;
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /*CompactEIntArray      *mapping; */
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UNewTrie                 *mapping;
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ExpansionTable        *expansions;
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  struct CntTable       *contractions;
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UCATableHeader        *image;
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UColOptionSet         *options;
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  MaxExpansionTable     *maxExpansions;
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  MaxJamoExpansionTable *maxJamoExpansions;
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t               *unsafeCP;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint8_t               *contrEndCP;
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const UCollator       *UCA;
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UHashtable      *prefixLookup;
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  CombinClassTable      *cmLookup;  /* combining class lookup for tailoring. */
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} tempUCATable;
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar cp;
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t cClass;   // combining class
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}CompData;
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CompData *precomp;
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t precompLen;
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *decomp;
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t decompLen;
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *comp;
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t compLen;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t curClass;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t tailoringCM;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  cmPos;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}tempTailorContext;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status);
13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI int32_t U_EXPORT2 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, UErrorCode *status);
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
144