1/*
2*******************************************************************************
3*
4*   Copyright (C) 2000-2008, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  genuca.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created at the end of XX century
14*   created by: Vladimir Weinstein
15*
16*   This program reads the Franctional UCA table and generates
17*   internal format for UCA table as well as inverse UCA table.
18*   It then writes binary files containing the data: ucadata.dat
19*   & invuca.dat
20*   Change history:
21*   02/23/2001  grhoten                 Made it into a tool
22*   02/23/2001  weiv                    Moved element & table handling code to i18n
23*   05/09/2001  weiv                    Case bits are now in the CEs, not in front
24*/
25
26#include "unicode/utypes.h"
27#include "unicode/putil.h"
28#include "unicode/udata.h"
29#include "unicode/uclean.h"
30#include "ucol_imp.h"
31#include "genuca.h"
32#include "uoptions.h"
33#include "toolutil.h"
34#include "unewdata.h"
35#include "cstring.h"
36#include "cmemory.h"
37
38#include <stdio.h>
39
40/*
41 * Global - verbosity
42 */
43UBool VERBOSE = FALSE;
44
45static UVersionInfo UCAVersion;
46
47#if UCONFIG_NO_COLLATION
48
49/* dummy UDataInfo cf. udata.h */
50static UDataInfo dummyDataInfo = {
51    sizeof(UDataInfo),
52    0,
53
54    U_IS_BIG_ENDIAN,
55    U_CHARSET_FAMILY,
56    U_SIZEOF_UCHAR,
57    0,
58
59    { 0, 0, 0, 0 },                 /* dummy dataFormat */
60    { 0, 0, 0, 0 },                 /* dummy formatVersion */
61    { 0, 0, 0, 0 }                  /* dummy dataVersion */
62};
63
64#else
65
66static const UDataInfo ucaDataInfo={
67    sizeof(UDataInfo),
68    0,
69
70    U_IS_BIG_ENDIAN,
71    U_CHARSET_FAMILY,
72    sizeof(UChar),
73    0,
74
75    {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3},     /* dataFormat="UCol"            */
76    /* 03/26/2002 bumped up version since format has changed */
77    /* 09/16/2002 bumped up version since we went from UColAttributeValue */
78    /*            to int32_t in UColOptionSet */
79    /* 05/13/2003 This one also updated since we added UCA and UCD versions */
80    /*            to header */
81    /* 09/11/2003 Adding information required by data swapper */
82    {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3},                 /* formatVersion                */
83    {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
84};
85
86static const UDataInfo invUcaDataInfo={
87    sizeof(UDataInfo),
88    0,
89
90    U_IS_BIG_ENDIAN,
91    U_CHARSET_FAMILY,
92    sizeof(UChar),
93    0,
94
95    {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3},     /* dataFormat="InvC"            */
96    /* 03/26/2002 bumped up version since format has changed */
97    /* 04/29/2003 2.1 format - we have added UCA version to header */
98    {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3},                 /* formatVersion                */
99    {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
100};
101
102UCAElements le;
103
104int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
105    if(U_FAILURE(*status)) {
106        return 0;
107    }
108    char buffer[1024];
109    int32_t i = 0;
110    while(**from != separator) {
111        if(**from != ' ') {
112            *(buffer+i++) = **from;
113        }
114        (*from)++;
115    }
116    (*from)++;
117    *(buffer + i) = 0;
118    //*to = (char *)malloc(strlen(buffer)+1);
119    strcpy(to, buffer);
120    return i/2;
121}
122
123
124uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) {
125    if(U_FAILURE(*status)) {
126        return 0;
127    }
128    uint32_t value = 0;
129    char primsave = '\0';
130    char secsave = '\0';
131    char tersave = '\0';
132    char *primend = primary+4;
133    if(strlen(primary) > 4) {
134        primsave = *primend;
135        *primend = '\0';
136    }
137    char *secend = secondary+2;
138    if(strlen(secondary) > 2) {
139        secsave = *secend;
140        *secend = '\0';
141    }
142    char *terend = tertiary+2;
143    if(strlen(tertiary) > 2) {
144        tersave = *terend;
145        *terend = '\0';
146    }
147    uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0);
148    uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0);
149    uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0);
150    if(primvalue <= 0xFF) {
151      primvalue <<= 8;
152    }
153
154    value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
155        ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
156        (tervalue&UCOL_TERTIARYORDERMASK);
157
158    if(primsave!='\0') {
159        *primend = primsave;
160    }
161    if(secsave!='\0') {
162        *secend = secsave;
163    }
164    if(tersave!='\0') {
165        *terend = tersave;
166    }
167    return value;
168}
169
170static uint32_t inverseTable[0xFFFF][3];
171static uint32_t inversePos = 0;
172static UChar stringContinue[0xFFFF];
173static uint32_t sContPos = 0;
174
175static void addNewInverse(UCAElements *element, UErrorCode *status) {
176  if(U_FAILURE(*status)) {
177    return;
178  }
179  if(VERBOSE && isContinuation(element->CEs[1])) {
180    //fprintf(stdout, "+");
181  }
182  inversePos++;
183  inverseTable[inversePos][0] = element->CEs[0];
184  if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
185    inverseTable[inversePos][1] = element->CEs[1];
186  } else {
187    inverseTable[inversePos][1] = 0;
188  }
189  if(element->cSize < 2) {
190    inverseTable[inversePos][2] = element->cPoints[0];
191  } else { /* add a new store of cruft */
192    inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
193    memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
194    sContPos += element->cSize+1;
195  }
196}
197
198static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
199  if(U_FAILURE(*status)) {
200    return;
201  }
202
203  if(VERBOSE && isContinuation(element->CEs[1])) {
204    //fprintf(stdout, "+");
205  }
206  if(position <= inversePos) {
207    /*move stuff around */
208    uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]);
209    uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove);
210  }
211  inverseTable[position][0] = element->CEs[0];
212  if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
213    inverseTable[position][1] = element->CEs[1];
214  } else {
215    inverseTable[position][1] = 0;
216  }
217  if(element->cSize < 2) {
218    inverseTable[position][2] = element->cPoints[0];
219  } else { /* add a new store of cruft */
220    inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
221    memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
222    sContPos += element->cSize+1;
223  }
224  inversePos++;
225}
226
227static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
228
229  if(U_FAILURE(*status)) {
230    return;
231  }
232
233      if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
234        stringContinue[sContPos] = (UChar)inverseTable[position][2];
235        inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
236        sContPos++;
237        stringContinue[sContPos++] = 0xFFFF;
238        memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
239        sContPos += element->cSize;
240        stringContinue[sContPos++] = 0xFFFE;
241      } else { /* adding to the already existing continuing table */
242        uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
243        uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
244
245        if(contIndex+contSize < sContPos) {
246          /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
247          memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
248        }
249
250        stringContinue[contIndex+contSize-1] = 0xFFFF;
251        memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
252        sContPos += element->cSize+1;
253        stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;
254
255        inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
256      }
257}
258
259/*
260 * Takes two CEs (lead and continuation) and
261 * compares them as CEs should be compared:
262 * primary vs. primary, secondary vs. secondary
263 * tertiary vs. tertiary
264 */
265static int32_t compareCEs(uint32_t *source, uint32_t *target) {
266  uint32_t s1 = source[0], s2, t1 = target[0], t2;
267  if(isContinuation(source[1])) {
268    s2 = source[1];
269  } else {
270    s2 = 0;
271  }
272  if(isContinuation(target[1])) {
273    t2 = target[1];
274  } else {
275    t2 = 0;
276  }
277
278  uint32_t s = 0, t = 0;
279  if(s1 == t1 && s2 == t2) {
280    return 0;
281  }
282  s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
283  t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
284  if(s < t) {
285    return -1;
286  } else if(s > t) {
287    return 1;
288  } else {
289    s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
290    t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
291    if(s < t) {
292      return -1;
293    } else if(s > t) {
294      return 1;
295    } else {
296      s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
297      t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
298      if(s < t) {
299        return -1;
300      } else {
301        return 1;
302      }
303    }
304  }
305}
306
307static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
308  uint32_t position = inversePos;
309  uint32_t saveElement = element->CEs[0];
310  int32_t compResult = 0;
311  element->CEs[0] &= 0xFFFFFF3F;
312  if(element->noOfCEs == 1) {
313    element->CEs[1] = 0;
314  }
315  if(inversePos == 0) {
316    inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
317    addNewInverse(element, status);
318  } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
319    while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
320    if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); }
321    if(compResult == 0) {
322      addToExistingInverse(element, position, status);
323    } else {
324      insertInverse(element, position+1, status);
325    }
326  } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
327    addToExistingInverse(element, inversePos, status);
328  } else {
329    addNewInverse(element, status);
330  }
331  element->CEs[0] = saveElement;
332  if(VERBOSE) { fprintf(stdout, "+"); }
333  return inversePos;
334}
335
336static InverseUCATableHeader *assembleInverseTable(UErrorCode *status)
337{
338  InverseUCATableHeader *result = NULL;
339  uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader));
340  uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
341  uint32_t contsByteSize = sContPos * sizeof(UChar);
342  uint32_t i = 0;
343
344  result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
345  uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
346  if(result != NULL) {
347    result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
348
349    inversePos++;
350    inverseTable[inversePos][0] = 0xFFFFFFFF;
351    inverseTable[inversePos][1] = 0xFFFFFFFF;
352    inverseTable[inversePos][2] = 0x0000FFFF;
353    inversePos++;
354
355    for(i = 2; i<inversePos; i++) {
356      if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
357        fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
358      } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
359        fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
360      }
361    }
362
363    result->tableSize = inversePos;
364    result->contsSize = sContPos;
365
366    result->table = headerByteSize;
367    result->conts = headerByteSize + inverseTableByteSize;
368
369    memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
370    memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);
371
372  } else {
373    *status = U_MEMORY_ALLOCATION_ERROR;
374    return NULL;
375  }
376
377  return result;
378}
379
380
381static void writeOutInverseData(InverseUCATableHeader *data,
382                  const char *outputDir,
383                  const char *copyright,
384                  UErrorCode *status)
385{
386    UNewDataMemory *pData;
387
388    long dataLength;
389
390    UDataInfo invUcaInfo;
391    uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
392    u_getUnicodeVersion(invUcaInfo.dataVersion);
393
394    pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
395                       copyright, status);
396
397    if(U_FAILURE(*status)) {
398        fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
399        return;
400    }
401
402    /* write the data to the file */
403    if (VERBOSE) {
404        fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
405                                                                INVC_DATA_NAME,
406                                                                INVC_DATA_TYPE);
407    }
408    udata_writeBlock(pData, data, data->byteSize);
409
410    /* finish up */
411    dataLength=udata_finish(pData, status);
412    if(U_FAILURE(*status)) {
413        fprintf(stderr, "Error: error %d writing the output file\n", *status);
414        return;
415    }
416}
417
418
419
420static int32_t hex2num(char hex) {
421    if(hex>='0' && hex <='9') {
422        return hex-'0';
423    } else if(hex>='a' && hex<='f') {
424        return hex-'a'+10;
425    } else if(hex>='A' && hex<='F') {
426        return hex-'A'+10;
427    } else {
428        return 0;
429    }
430}
431
432UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
433    char buffer[2048], primary[100], secondary[100], tertiary[100];
434    UBool detectedContraction;
435    int32_t i = 0;
436    unsigned int theValue;
437    char *pointer = NULL;
438    char *commentStart = NULL;
439    char *startCodePoint = NULL;
440    char *endCodePoint = NULL;
441    char *spacePointer = NULL;
442    char *dashPointer = NULL;
443    char *result = fgets(buffer, 2048, data);
444    int32_t buflen = (int32_t)uprv_strlen(buffer);
445    if(U_FAILURE(*status)) {
446        return 0;
447    }
448    *primary = *secondary = *tertiary = '\0';
449    if(result == NULL) {
450        if(feof(data)) {
451            return NULL;
452        } else {
453            fprintf(stderr, "empty line but no EOF!\n");
454            *status = U_INVALID_FORMAT_ERROR;
455            return NULL;
456        }
457    }
458    while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
459      buffer[--buflen] = 0;
460    }
461
462    if(buffer[0] == 0 || buffer[0] == '#') {
463        return NULL; // just a comment, skip whole line
464    }
465
466    UCAElements *element = &le; //(UCAElements *)malloc(sizeof(UCAElements));
467
468    enum ActionType {
469      READCE,
470      READHEX,
471      READUCAVERSION
472    };
473
474    // Directives.
475    if(buffer[0] == '[') {
476      uint32_t cnt = 0;
477      static const struct {
478        char name[128];
479        uint32_t *what;
480        ActionType what_to_do;
481      } vt[]  = { {"[first tertiary ignorable",  consts->UCA_FIRST_TERTIARY_IGNORABLE,  READCE},
482                  {"[last tertiary ignorable",   consts->UCA_LAST_TERTIARY_IGNORABLE,   READCE},
483                  {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE},
484                  {"[last secondary ignorable",  consts->UCA_LAST_SECONDARY_IGNORABLE,  READCE},
485                  {"[first primary ignorable",   consts->UCA_FIRST_PRIMARY_IGNORABLE,   READCE},
486                  {"[last primary ignorable",    consts->UCA_LAST_PRIMARY_IGNORABLE,    READCE},
487                  {"[first variable",            consts->UCA_FIRST_VARIABLE,            READCE},
488                  {"[last variable",             consts->UCA_LAST_VARIABLE,             READCE},
489                  {"[first regular",             consts->UCA_FIRST_NON_VARIABLE,        READCE},
490                  {"[last regular",              consts->UCA_LAST_NON_VARIABLE,         READCE},
491                  {"[first implicit",            consts->UCA_FIRST_IMPLICIT,            READCE},
492                  {"[last implicit",             consts->UCA_LAST_IMPLICIT,             READCE},
493                  {"[first trailing",            consts->UCA_FIRST_TRAILING,            READCE},
494                  {"[last trailing",             consts->UCA_LAST_TRAILING,             READCE},
495
496                  {"[fixed top",                       &consts->UCA_PRIMARY_TOP_MIN,           READHEX},
497                  {"[fixed first implicit byte",       &consts->UCA_PRIMARY_IMPLICIT_MIN,      READHEX},
498                  {"[fixed last implicit byte",        &consts->UCA_PRIMARY_IMPLICIT_MAX,      READHEX},
499                  {"[fixed first trail byte",          &consts->UCA_PRIMARY_TRAILING_MIN,      READHEX},
500                  {"[fixed last trail byte",           &consts->UCA_PRIMARY_TRAILING_MAX,      READHEX},
501                  {"[fixed first special byte",        &consts->UCA_PRIMARY_SPECIAL_MIN,       READHEX},
502                  {"[fixed last special byte",         &consts->UCA_PRIMARY_SPECIAL_MAX,       READHEX},
503                  {"[variable top = ",                &t->options->variableTopValue,          READHEX},
504                  {"[UCA version = ",                 NULL,                          READUCAVERSION}
505      };
506      for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
507        uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
508        if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
509            element->variableTop = TRUE;
510            if(vt[cnt].what_to_do == READHEX) {
511              if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */
512              {
513                  fprintf(stderr, " scanf(hex) failed on !\n ");
514              }
515              *(vt[cnt].what) = (UChar)theValue;
516              //if(cnt == 1) { // first implicit
517                // we need to set the value for top next
518                //uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
519                //consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
520              //}
521            } else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */
522              pointer = strchr(buffer+vtLen, '[');
523              if(pointer) {
524                pointer++;
525                element->sizePrim[0]=readElement(&pointer, primary, ',', status);
526                element->sizeSec[0]=readElement(&pointer, secondary, ',', status);
527                element->sizeTer[0]=readElement(&pointer, tertiary, ']', status);
528
529                vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status);
530                if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) {
531                  uint32_t CEi = 1;
532                  uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
533                    if(2*CEi<element->sizePrim[i]) {
534                        value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
535                        value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
536                    }
537
538                    if(2*CEi+1<element->sizePrim[i]) {
539                        value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
540                        value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
541                    }
542
543                    if(CEi<element->sizeSec[i]) {
544                        value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
545                        value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
546                    }
547
548                    if(CEi<element->sizeTer[i]) {
549                        value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
550                        value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
551                    }
552
553                    CEi++;
554
555                    vt[cnt].what[1] = value;
556                    //element->CEs[CEindex++] = value;
557                } else {
558                  vt[cnt].what[1] = 0;
559                }
560              } else {
561                fprintf(stderr, "Failed to read a CE from line %s\n", buffer);
562              }
563            } else { //vt[cnt].what_to_do == READUCAVERSION
564              u_versionFromString(UCAVersion, buffer+vtLen);
565              if(VERBOSE) {
566                fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]);
567              }
568            }
569            //element->cPoints[0] = (UChar)theValue;
570            //return element;
571            return NULL;
572        }
573      }
574      fprintf(stderr, "Warning: unrecognized option: %s\n", buffer);
575      //*status = U_INVALID_FORMAT_ERROR;
576      return NULL;
577    }
578    element->variableTop = FALSE;
579
580    startCodePoint = buffer;
581    endCodePoint = strchr(startCodePoint, ';');
582
583    if(endCodePoint == 0) {
584        fprintf(stderr, "error - line with no code point!\n");
585        *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
586        return NULL;
587    } else {
588        *(endCodePoint) = 0;
589    }
590
591    memset(element, 0, sizeof(*element));
592
593    element->cPoints = element->uchars;
594
595    spacePointer = strchr(buffer, ' ');
596    if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */
597    {
598      fprintf(stderr, " scanf(hex) failed!\n ");
599    }
600    element->cPoints[0] = (UChar)theValue;
601
602    if(spacePointer == 0) {
603        detectedContraction = FALSE;
604        element->cSize = 1;
605    } else {
606        dashPointer = strchr(buffer, '|');
607        if (dashPointer != NULL) {
608            // prefix characters
609            element->prefixChars[0] = (UChar)theValue;
610            element->prefixSize = 1;
611            element->prefix = element->prefixChars;
612            sscanf(dashPointer+1, "%4x", &theValue);
613            element->cPoints[0] = (UChar)theValue;
614            element->cSize = 1;
615        }
616        else {
617          // Contractions or surrogate characters.
618            i = 1;
619            detectedContraction = TRUE;
620            while(spacePointer != NULL) {
621                sscanf(spacePointer+1, "%4x", &theValue);
622                element->cPoints[i++] = (UChar)theValue;
623                spacePointer = strchr(spacePointer+1, ' ');
624            }
625            element->cSize = i;
626        }
627
628
629        //fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
630    }
631
632    startCodePoint = endCodePoint+1;
633
634    commentStart = strchr(startCodePoint, '#');
635    if(commentStart == NULL) {
636        commentStart = strlen(startCodePoint) + startCodePoint;
637    }
638
639    i = 0;
640    uint32_t CEindex = 0;
641    element->noOfCEs = 0;
642    for(;;) {
643        endCodePoint = strchr(startCodePoint, ']');
644        if(endCodePoint == NULL || endCodePoint >= commentStart) {
645            break;
646        }
647        pointer = strchr(startCodePoint, '[');
648        pointer++;
649
650        element->sizePrim[i]=readElement(&pointer, primary, ',', status);
651        element->sizeSec[i]=readElement(&pointer, secondary, ',', status);
652        element->sizeTer[i]=readElement(&pointer, tertiary, ']', status);
653
654
655        /* I want to get the CEs entered right here, including continuation */
656        element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status);
657
658        uint32_t CEi = 1;
659        while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
660          uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
661            if(2*CEi<element->sizePrim[i]) {
662                value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
663                value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
664            }
665
666            if(2*CEi+1<element->sizePrim[i]) {
667                value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
668                value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
669            }
670
671            if(CEi<element->sizeSec[i]) {
672                value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
673                value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
674            }
675
676            if(CEi<element->sizeTer[i]) {
677                value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
678                value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
679            }
680
681            CEi++;
682
683            element->CEs[CEindex++] = value;
684        }
685
686      startCodePoint = endCodePoint+1;
687      i++;
688    }
689    element->noOfCEs = CEindex;
690#if 0
691    element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]);
692#endif
693    // we don't want any strange stuff after useful data!
694    if (pointer == NULL) {
695        /* huh? Did we get ']' without the '['? Pair your brackets! */
696        *status=U_INVALID_FORMAT_ERROR;
697    }
698    else {
699        while(pointer < commentStart)  {
700            if(*pointer != ' ' && *pointer != '\t')
701            {
702                *status=U_INVALID_FORMAT_ERROR;
703                break;
704            }
705            pointer++;
706        }
707    }
708
709    if(U_FAILURE(*status)) {
710        fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
711        *status = U_INTERNAL_PROGRAM_ERROR;
712        return NULL;
713    }
714
715    return element;
716}
717
718
719void writeOutData(UCATableHeader *data,
720                  UCAConstants *consts,
721                  UChar contractions[][3],
722                  uint32_t noOfcontractions,
723                  const char *outputDir,
724                  const char *copyright,
725                  UErrorCode *status)
726{
727    if(U_FAILURE(*status)) {
728        return;
729    }
730
731    uint32_t size = data->size;
732
733    data->UCAConsts = data->size;
734    data->size += paddedsize(sizeof(UCAConstants));
735
736    if(noOfcontractions != 0) {
737      contractions[noOfcontractions][0] = 0;
738      contractions[noOfcontractions][1] = 0;
739      contractions[noOfcontractions][2] = 0;
740      noOfcontractions++;
741
742
743      data->contractionUCACombos = data->size;
744      data->contractionUCACombosWidth = 3;
745      data->contractionUCACombosSize = noOfcontractions;
746      data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
747    }
748
749    UNewDataMemory *pData;
750
751    long dataLength;
752    UDataInfo ucaInfo;
753    uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
754    u_getUnicodeVersion(ucaInfo.dataVersion);
755
756    pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
757                       copyright, status);
758
759    if(U_FAILURE(*status)) {
760        fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
761        return;
762    }
763
764    /* write the data to the file */
765    if (VERBOSE) {
766        fprintf(stdout, "Writing out UCA table: %s%c%s.%s\n", outputDir,
767                                                        U_FILE_SEP_CHAR,
768                                                        U_ICUDATA_NAME "_" UCA_DATA_NAME,
769                                                        UCA_DATA_TYPE);
770    }
771    udata_writeBlock(pData, data, size);
772
773    // output the constants here
774    udata_writeBlock(pData, consts, sizeof(UCAConstants));
775
776    if(noOfcontractions != 0) {
777      udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar));
778      udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t));
779    }
780
781    /* finish up */
782    dataLength=udata_finish(pData, status);
783    if(U_FAILURE(*status)) {
784        fprintf(stderr, "Error: error %d writing the output file\n", *status);
785        return;
786    }
787}
788
789static int32_t
790write_uca_table(const char *filename,
791                const char *outputDir,
792                const char *copyright,
793                UErrorCode *status)
794{
795    FILE *data = fopen(filename, "r");
796    if(data == NULL) {
797        fprintf(stderr, "Couldn't open file: %s\n", filename);
798        return -1;
799    }
800    uint32_t line = 0;
801    UCAElements *element = NULL;
802    UChar variableTopValue = 0;
803    UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
804    /* test for NULL */
805    if(myD == NULL) {
806        *status = U_MEMORY_ALLOCATION_ERROR;
807        fclose(data);
808        return 0;
809    }
810    uprv_memset(myD, 0, sizeof(UCATableHeader));
811    UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
812    /* test for NULL */
813    if(opts == NULL) {
814        *status = U_MEMORY_ALLOCATION_ERROR;
815        uprv_free(myD);
816        fclose(data);
817        return 0;
818    }
819    uprv_memset(opts, 0, sizeof(UColOptionSet));
820    UChar contractionCEs[512][3];
821    uprv_memset(contractionCEs, 0, 512*3*sizeof(UChar));
822    uint32_t noOfContractions = 0;
823    UCAConstants consts;
824    uprv_memset(&consts, 0, sizeof(consts));
825#if 0
826    UCAConstants consts = {
827      UCOL_RESET_TOP_VALUE,
828      UCOL_FIRST_PRIMARY_IGNORABLE,
829      UCOL_LAST_PRIMARY_IGNORABLE,
830      UCOL_LAST_PRIMARY_IGNORABLE_CONT,
831      UCOL_FIRST_SECONDARY_IGNORABLE,
832      UCOL_LAST_SECONDARY_IGNORABLE,
833      UCOL_FIRST_TERTIARY_IGNORABLE,
834      UCOL_LAST_TERTIARY_IGNORABLE,
835      UCOL_FIRST_VARIABLE,
836      UCOL_LAST_VARIABLE,
837      UCOL_FIRST_NON_VARIABLE,
838      UCOL_LAST_NON_VARIABLE,
839
840      UCOL_NEXT_TOP_VALUE,
841/*
842      UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
843      UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
844      UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
845      UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
846      UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
847      UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
848      UCOL_NEXT_FIRST_VARIABLE,
849      UCOL_NEXT_LAST_VARIABLE,
850*/
851
852      PRIMARY_IMPLICIT_MIN,
853      PRIMARY_IMPLICIT_MAX
854    };
855#endif
856
857
858    uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
859
860    opts->variableTopValue = variableTopValue;
861    opts->strength = UCOL_TERTIARY;
862    opts->frenchCollation = UCOL_OFF;
863    opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
864    opts->caseFirst = UCOL_OFF;         /* who goes first, lower case or uppercase */
865    opts->caseLevel = UCOL_OFF;         /* do we have an extra case level */
866    opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
867    opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
868    opts->numericCollation = UCOL_OFF;
869    myD->jamoSpecial = FALSE;
870
871    tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
872    if(U_FAILURE(*status))
873    {
874        fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
875        uprv_free(opts);
876        uprv_free(myD);
877        fclose(data);
878        return -1;
879    }
880
881#if 0
882    IMPLICIT_TAG = 9,
883/*
884 *****************************************************************************************
885 * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF
886 ******************************************************************************************
887 */
888#endif
889
890// * set to zero
891struct {
892      UChar32 start;
893      UChar32 end;
894      int32_t value;
895    } ranges[] =
896    {
897#if 0
898      {0xAC00, 0xD7AF, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) },  //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
899      {0xD800, 0xDBFF, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24)  },  //1 LEAD_SURROGATE_TAG,  /* D800-DBFF*/
900      {0xDC00, 0xDFFF, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) },  //2 TRAIL_SURROGATE DC00-DFFF
901      {0x3400, 0x4DB5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //3 CJK_IMPLICIT_TAG,   /* 0x3400-0x4DB5*/
902      {0x4E00, 0x9FA5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //4 CJK_IMPLICIT_TAG,   /* 0x4E00-0x9FA5*/
903      {0xF900, 0xFA2D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //5 CJK_IMPLICIT_TAG,   /* 0xF900-0xFA2D*/
904      {0x20000, 0x2A6D6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //6 CJK_IMPLICIT_TAG,   /* 0x20000-0x2A6D6*/
905      {0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //7 CJK_IMPLICIT_TAG,   /* 0x2F800-0x2FA1D*/
906#endif
907      {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) },  //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
908      //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24)  },  //1 LEAD_SURROGATE_TAG,  /* D800-DBFF*/
909      {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) },  //2 TRAIL_SURROGATE DC00-DFFF
910      // Now directly handled in the collation code by the swapCJK function.
911      //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //3 CJK_IMPLICIT_TAG,   /* 0x3400-0x4DB5*/
912      //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //4 CJK_IMPLICIT_TAG,   /* 0x4E00-0x9FA5*/
913      //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //5 CJK_IMPLICIT_TAG,   /* 0xF900-0xFA2D*/
914      //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //6 CJK_IMPLICIT_TAG,   /* 0x20000-0x2A6D6*/
915      //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //7 CJK_IMPLICIT_TAG,   /* 0x2F800-0x2FA1D*/
916    };
917    uint32_t i = 0;
918
919    for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) {
920      /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */
921      utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE);
922    }
923
924
925    int32_t surrogateCount = 0;
926    while(!feof(data)) {
927        if(U_FAILURE(*status)) {
928            fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
929                *status, u_errorName(*status), (int)line, filename);
930            exit(*status);
931        }
932
933        element = readAnElement(data, t, &consts, status);
934        line++;
935        if(VERBOSE) {
936          fprintf(stdout, "%u ", (int)line);
937        }
938        if(element != NULL) {
939            // we have read the line, now do something sensible with the read data!
940
941            // Below stuff was taken care of in readAnElement
942            //if(element->variableTop == TRUE && variableTopValue == 0) {
943            //    t->options->variableTopValue = element->cPoints[0];
944            //}
945
946            // if element is a contraction, we want to add it to contractions
947            if(element->cSize > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction
948              if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) {
949                surrogateCount++;
950              } else {
951                contractionCEs[noOfContractions][0] = element->cPoints[0];
952                contractionCEs[noOfContractions][1] = element->cPoints[1];
953                if(element->cSize > 2) { // the third one
954                  contractionCEs[noOfContractions][2] = element->cPoints[2];
955                } else {
956                  contractionCEs[noOfContractions][2] = 0;
957                }
958                noOfContractions++;
959              }
960            }
961            else {
962                // TODO (claireho): does this work? Need more tests
963                // The following code is to handle the UCA pre-context rules
964                // for L/l with middle dot. We share the structures for contractionCombos.
965                // The format for pre-context character is
966                // contractionCEs[0]: codepoint in element->cPoints[0]
967                // contractionCEs[1]: '\0' to differentiate with contractions.
968                // contractionCEs[2]: prefix char
969                if (element->prefixSize>0) {
970                    contractionCEs[noOfContractions][0]=element->cPoints[0];
971                    contractionCEs[noOfContractions][1]='\0';
972                    contractionCEs[noOfContractions][2]=element->prefixChars[0];
973                    noOfContractions++;
974                }
975
976            }
977
978            /* we're first adding to inverse, because addAnElement will reverse the order */
979            /* of code points and stuff... we don't want that to happen */
980            addToInverse(element, status);
981            if(!(element->cSize > 1 && element->cPoints[0] == 0xFDD0)) {
982              uprv_uca_addAnElement(t, element, status);
983            }
984        }
985    }
986
987    if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) {
988        fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
989        uprv_uca_closeTempTable(t);
990        uprv_free(opts);
991        uprv_free(myD);
992        fclose(data);
993        return -1;
994    }
995/*    {
996        uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
997    }*/
998
999    if (VERBOSE) {
1000        fprintf(stdout, "\nLines read: %u\n", (int)line);
1001        fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount);
1002        fprintf(stdout, "Raw data breakdown:\n");
1003        /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
1004        fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
1005        fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
1006        fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
1007    }
1008
1009
1010    /* produce canonical closure for table */
1011    /* first set up constants for implicit calculation */
1012    uprv_uca_initImplicitConstants(status);
1013    /* do the closure */
1014    int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
1015    if(noOfClosures != 0) {
1016      fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
1017    }
1018
1019    /* test */
1020    UCATableHeader *myData = uprv_uca_assembleTable(t, status);
1021
1022    if (VERBOSE) {
1023        fprintf(stdout, "Compacted data breakdown:\n");
1024        /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
1025        fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
1026        fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
1027        fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
1028    }
1029
1030    if(U_FAILURE(*status)) {
1031        fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
1032        uprv_uca_closeTempTable(t);
1033        uprv_free(opts);
1034        uprv_free(myD);
1035        fclose(data);
1036        return -1;
1037    }
1038
1039    /* populate the version info struct with version info*/
1040    myData->version[0] = UCOL_BUILDER_VERSION;
1041    myData->version[1] = UCAVersion[0];
1042    myData->version[2] = UCAVersion[1];
1043    myData->version[3] = UCAVersion[2];
1044    /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
1045    // Removed this macro. Instead, we use the fields below
1046    //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION;
1047    //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt
1048    uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo));
1049    u_getUnicodeVersion(myData->UCDVersion);
1050
1051    writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status);
1052
1053    InverseUCATableHeader *inverse = assembleInverseTable(status);
1054    uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo));
1055    writeOutInverseData(inverse, outputDir, copyright, status);
1056
1057    uprv_uca_closeTempTable(t);
1058    uprv_free(myD);
1059    uprv_free(opts);
1060
1061
1062    uprv_free(myData);
1063    uprv_free(inverse);
1064    fclose(data);
1065
1066    return 0;
1067}
1068
1069#endif /* #if !UCONFIG_NO_COLLATION */
1070
1071static UOption options[]={
1072    UOPTION_HELP_H,              /* 0  Numbers for those who*/
1073    UOPTION_HELP_QUESTION_MARK,  /* 1   can't count. */
1074    UOPTION_COPYRIGHT,           /* 2 */
1075    UOPTION_VERSION,             /* 3 */
1076    UOPTION_DESTDIR,             /* 4 */
1077    UOPTION_SOURCEDIR,           /* 5 */
1078    UOPTION_VERBOSE,             /* 6 */
1079    UOPTION_ICUDATADIR           /* 7 */
1080    /* weiv can't count :))))) */
1081};
1082
1083int main(int argc, char* argv[]) {
1084    UErrorCode status = U_ZERO_ERROR;
1085    const char* destdir = NULL;
1086    const char* srcDir = NULL;
1087    char filename[300];
1088    char *basename = NULL;
1089    const char *copyright = NULL;
1090    uprv_memset(&UCAVersion, 0, 4);
1091
1092    U_MAIN_INIT_ARGS(argc, argv);
1093
1094    /* preset then read command line options */
1095    options[4].value=u_getDataDirectory();
1096    options[5].value="";
1097    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1098
1099    /* error handling, printing usage message */
1100    if(argc<0) {
1101        fprintf(stderr,
1102            "error in command line argument \"%s\"\n",
1103            argv[-argc]);
1104    } else if(argc<2) {
1105        argc=-1;
1106    }
1107    if(options[0].doesOccur || options[1].doesOccur) {
1108        fprintf(stderr,
1109            "usage: %s [-options] file\n"
1110            "\tRead in UCA collation text data and write out the binary collation data\n"
1111            "options:\n"
1112            "\t-h or -? or --help  this usage text\n"
1113            "\t-V or --version     show a version message\n"
1114            "\t-c or --copyright   include a copyright notice\n"
1115            "\t-d or --destdir     destination directory, followed by the path\n"
1116            "\t-s or --sourcedir   source directory, followed by the path\n"
1117            "\t-v or --verbose     turn on verbose output\n"
1118            "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
1119            "\t                    followed by path, defaults to %s\n",
1120            argv[0], u_getDataDirectory());
1121        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1122    }
1123    if(options[3].doesOccur) {
1124        fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
1125#if UCONFIG_NO_COLLATION
1126            0, 0
1127#else
1128            UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
1129#endif
1130            );
1131        fprintf(stdout, U_COPYRIGHT_STRING"\n");
1132        exit(0);
1133    }
1134
1135    /* get the options values */
1136    destdir = options[4].value;
1137    srcDir = options[5].value;
1138    VERBOSE = options[6].doesOccur;
1139
1140    if (options[2].doesOccur) {
1141        copyright = U_COPYRIGHT_STRING;
1142    }
1143
1144    if (options[7].doesOccur) {
1145        u_setDataDirectory(options[7].value);
1146    }
1147    /* Initialize ICU */
1148    u_init(&status);
1149    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1150        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1151            argv[0], u_errorName(status));
1152        exit(1);
1153    }
1154    status = U_ZERO_ERROR;
1155
1156
1157    /* prepare the filename beginning with the source dir */
1158    uprv_strcpy(filename, srcDir);
1159    basename=filename+uprv_strlen(filename);
1160
1161    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
1162        *basename++ = U_FILE_SEP_CHAR;
1163    }
1164
1165    if(argc < 0) {
1166      uprv_strcpy(basename, "FractionalUCA.txt");
1167    } else {
1168      argv++;
1169      uprv_strcpy(basename, getLongPathname(*argv));
1170    }
1171
1172#if 0
1173    if(u_getCombiningClass(0x0053) == 0)
1174    {
1175        fprintf(stderr, "SEVERE ERROR: Normalization data is not functioning! Bailing out.  Was not able to load unorm.dat.\n");
1176        exit(1);
1177    }
1178#endif
1179
1180#if UCONFIG_NO_COLLATION
1181
1182    UNewDataMemory *pData;
1183    const char *msg;
1184
1185    msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
1186    fprintf(stderr, "%s\n", msg);
1187    pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
1188                         NULL, &status);
1189    udata_writeBlock(pData, msg, strlen(msg));
1190    udata_finish(pData, &status);
1191
1192    msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
1193    fprintf(stderr, "%s\n", msg);
1194    pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
1195                         NULL, &status);
1196    udata_writeBlock(pData, msg, strlen(msg));
1197    udata_finish(pData, &status);
1198
1199    return (int)status;
1200
1201#else
1202
1203    return write_uca_table(filename, destdir, copyright, &status);
1204
1205#endif
1206}
1207
1208/*
1209 * Hey, Emacs, please set the following:
1210 *
1211 * Local Variables:
1212 * indent-tabs-mode: nil
1213 * End:
1214 *
1215 */
1216