1/*
2 ********************************************************************************
3 *
4 *   Copyright (C) 1998-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 *  makeconv.c:
11 *  tool creating a binary (compressed) representation of the conversion mapping
12 *  table (IBM NLTC ucmap format).
13 *
14 *  05/04/2000    helena     Added fallback mapping into the picture...
15 *  06/29/2000  helena      Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
20#include "unicode/ucnv_err.h"
21#include "ucnv_bld.h"
22#include "ucnv_imp.h"
23#include "ucnv_cnv.h"
24#include "cstring.h"
25#include "cmemory.h"
26#include "uinvchar.h"
27#include "filestrm.h"
28#include "toolutil.h"
29#include "uoptions.h"
30#include "unicode/udata.h"
31#include "unewdata.h"
32#include "uparse.h"
33#include "ucm.h"
34#include "makeconv.h"
35#include "genmbcs.h"
36
37#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
38
39#define DEBUG 0
40
41typedef struct ConvData {
42    UCMFile *ucm;
43    NewConverter *cnvData, *extData;
44    UConverterSharedData sharedData;
45    UConverterStaticData staticData;
46} ConvData;
47
48static void
49initConvData(ConvData *data) {
50    uprv_memset(data, 0, sizeof(ConvData));
51    data->sharedData.structSize=sizeof(UConverterSharedData);
52    data->staticData.structSize=sizeof(UConverterStaticData);
53    data->sharedData.staticData=&data->staticData;
54}
55
56static void
57cleanupConvData(ConvData *data) {
58    if(data!=NULL) {
59        if(data->cnvData!=NULL) {
60            data->cnvData->close(data->cnvData);
61            data->cnvData=NULL;
62        }
63        if(data->extData!=NULL) {
64            data->extData->close(data->extData);
65            data->extData=NULL;
66        }
67        ucm_close(data->ucm);
68        data->ucm=NULL;
69    }
70}
71
72/*
73 * from ucnvstat.c - static prototypes of data-based converters
74 */
75extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
76
77/*
78 * Global - verbosity
79 */
80UBool VERBOSE = FALSE;
81UBool SMALL = FALSE;
82UBool IGNORE_SISO_CHECK = FALSE;
83
84static void
85createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
86
87/*
88 * Set up the UNewData and write the converter..
89 */
90static void
91writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
92
93UBool haveCopyright=TRUE;
94
95static UDataInfo dataInfo={
96    sizeof(UDataInfo),
97    0,
98
99    U_IS_BIG_ENDIAN,
100    U_CHARSET_FAMILY,
101    sizeof(UChar),
102    0,
103
104    {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
105    {6, 2, 0, 0},                 /* formatVersion */
106    {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
107};
108
109static void
110writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
111{
112    UNewDataMemory *mem = NULL;
113    uint32_t sz2;
114    uint32_t size = 0;
115    int32_t tableType;
116
117    if(U_FAILURE(*status))
118      {
119        return;
120      }
121
122    tableType=TABLE_NONE;
123    if(data->cnvData!=NULL) {
124        tableType|=TABLE_BASE;
125    }
126    if(data->extData!=NULL) {
127        tableType|=TABLE_EXT;
128    }
129
130    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131
132    if(U_FAILURE(*status))
133      {
134        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135                cnvName,
136                "cnv",
137                u_errorName(*status));
138        return;
139      }
140
141    if(VERBOSE)
142      {
143        printf("- Opened udata %s.%s\n", cnvName, "cnv");
144      }
145
146
147    /* all read only, clean, platform independent data.  Mmmm. :)  */
148    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
149    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
150    /* Now, write the table */
151    if(tableType&TABLE_BASE) {
152        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153    }
154    if(tableType&TABLE_EXT) {
155        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156    }
157
158    sz2 = udata_finish(mem, status);
159    if(size != sz2)
160    {
161        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
162        *status=U_INTERNAL_PROGRAM_ERROR;
163    }
164    if(VERBOSE)
165    {
166      printf("- Wrote %u bytes to the udata.\n", (int)sz2);
167    }
168}
169
170enum {
171    OPT_HELP_H,
172    OPT_HELP_QUESTION_MARK,
173    OPT_COPYRIGHT,
174    OPT_VERSION,
175    OPT_DESTDIR,
176    OPT_VERBOSE,
177    OPT_SMALL,
178    OPT_IGNORE_SISO_CHECK,
179    OPT_COUNT
180};
181
182static UOption options[]={
183    UOPTION_HELP_H,
184    UOPTION_HELP_QUESTION_MARK,
185    UOPTION_COPYRIGHT,
186    UOPTION_VERSION,
187    UOPTION_DESTDIR,
188    UOPTION_VERBOSE,
189    { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
190    { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
191};
192
193int main(int argc, char* argv[])
194{
195    ConvData data;
196    UErrorCode err = U_ZERO_ERROR, localError;
197    char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
198    const char* destdir, *arg;
199    size_t destdirlen;
200    char* dot = NULL, *outBasename;
201    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202    char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
203    UVersionInfo icuVersion;
204    UBool printFilename;
205
206    err = U_ZERO_ERROR;
207
208    U_MAIN_INIT_ARGS(argc, argv);
209
210    /* Set up the ICU version number */
211    u_getVersion(icuVersion);
212    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
213
214    /* preset then read command line options */
215    options[OPT_DESTDIR].value=u_getDataDirectory();
216    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
217
218    /* error handling, printing usage message */
219    if(argc<0) {
220        fprintf(stderr,
221            "error in command line argument \"%s\"\n",
222            argv[-argc]);
223    } else if(argc<2) {
224        argc=-1;
225    }
226    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
227        FILE *stdfile=argc<0 ? stderr : stdout;
228        fprintf(stdfile,
229            "usage: %s [-options] files...\n"
230            "\tread .ucm codepage mapping files and write .cnv files\n"
231            "options:\n"
232            "\t-h or -? or --help  this usage text\n"
233            "\t-V or --version     show a version message\n"
234            "\t-c or --copyright   include a copyright notice\n"
235            "\t-d or --destdir     destination directory, followed by the path\n"
236            "\t-v or --verbose     Turn on verbose output\n",
237            argv[0]);
238        fprintf(stdfile,
239            "\t      --small       Generate smaller .cnv files. They will be\n"
240            "\t                    significantly smaller but may not be compatible with\n"
241            "\t                    older versions of ICU and will require heap memory\n"
242            "\t                    allocation when loaded.\n"
243            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
244        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245    }
246
247    if(options[OPT_VERSION].doesOccur) {
248        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250        printf("%s\n", U_COPYRIGHT_STRING);
251        exit(0);
252    }
253
254    /* get the options values */
255    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256    destdir = options[OPT_DESTDIR].value;
257    VERBOSE = options[OPT_VERBOSE].doesOccur;
258    SMALL = options[OPT_SMALL].doesOccur;
259
260    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
261        IGNORE_SISO_CHECK = TRUE;
262    }
263
264    if (destdir != NULL && *destdir != 0) {
265        uprv_strcpy(outFileName, destdir);
266        destdirlen = uprv_strlen(destdir);
267        outBasename = outFileName + destdirlen;
268        if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
269            *outBasename++ = U_FILE_SEP_CHAR;
270            ++destdirlen;
271        }
272    } else {
273        destdirlen = 0;
274        outBasename = outFileName;
275    }
276
277#if DEBUG
278    {
279      int i;
280      printf("makeconv: processing %d files...\n", argc - 1);
281      for(i=1; i<argc; ++i) {
282        printf("%s ", argv[i]);
283      }
284      printf("\n");
285      fflush(stdout);
286    }
287#endif
288
289    err = U_ZERO_ERROR;
290    printFilename = (UBool) (argc > 2 || VERBOSE);
291    for (++argv; --argc; ++argv)
292    {
293        arg = getLongPathname(*argv);
294
295        /* Check for potential buffer overflow */
296        if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
297        {
298            fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
299            return U_BUFFER_OVERFLOW_ERROR;
300        }
301
302        /*produces the right destination path for display*/
303        if (destdirlen != 0)
304        {
305            const char *basename;
306
307            /* find the last file sepator */
308            basename = findBasename(arg);
309            uprv_strcpy(outBasename, basename);
310        }
311        else
312        {
313            uprv_strcpy(outFileName, arg);
314        }
315
316        /*removes the extension if any is found*/
317        dot = uprv_strrchr(outBasename, '.');
318        if (dot)
319        {
320            *dot = '\0';
321        }
322
323        /* the basename without extension is the converter name */
324        uprv_strcpy(cnvName, outBasename);
325
326        /*Adds the target extension*/
327        uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
328
329#if DEBUG
330        printf("makeconv: processing %s  ...\n", arg);
331        fflush(stdout);
332#endif
333        localError = U_ZERO_ERROR;
334        initConvData(&data);
335        createConverter(&data, arg, &localError);
336
337        if (U_FAILURE(localError))
338        {
339            /* if an error is found, print out an error msg and keep going */
340            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
341                u_errorName(localError));
342            if(U_SUCCESS(err)) {
343                err = localError;
344            }
345        }
346        else
347        {
348            /* Insure the static data name matches the  file name */
349            /* Changed to ignore directory and only compare base name
350             LDH 1/2/08*/
351            char *p;
352            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
353
354            if(p == NULL)            /* OK, try alternate */
355            {
356                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
357                if(p == NULL)
358                {
359                    p=cnvName; /* If no separators, no problem */
360                }
361            }
362            else
363            {
364                p++;   /* If found separtor, don't include it in compare */
365            }
366            if(uprv_stricmp(p,data.staticData.name))
367            {
368                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
369                    cnvName,  CONVERTER_FILE_EXTENSION,
370                    data.staticData.name);
371            }
372
373            uprv_strcpy((char*)data.staticData.name, cnvName);
374
375            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
376                fprintf(stderr,
377                    "Error: A converter name must contain only invariant characters.\n"
378                    "%s is not a valid converter name.\n",
379                    data.staticData.name);
380                if(U_SUCCESS(err)) {
381                    err = U_INVALID_TABLE_FORMAT;
382                }
383            }
384
385            uprv_strcpy(cnvNameWithPkg, cnvName);
386
387            localError = U_ZERO_ERROR;
388            writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
389
390            if(U_FAILURE(localError))
391            {
392                /* if an error is found, print out an error msg and keep going*/
393                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
394                    u_errorName(localError));
395                if(U_SUCCESS(err)) {
396                    err = localError;
397                }
398            }
399            else if (printFilename)
400            {
401                puts(outBasename);
402            }
403        }
404        fflush(stdout);
405        fflush(stderr);
406
407        cleanupConvData(&data);
408    }
409
410    return err;
411}
412
413static void
414getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
415    if( (name[0]=='i' || name[0]=='I') &&
416        (name[1]=='b' || name[1]=='B') &&
417        (name[2]=='m' || name[2]=='M')
418    ) {
419        name+=3;
420        if(*name=='-') {
421            ++name;
422        }
423        *pPlatform=UCNV_IBM;
424        *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
425    } else {
426        *pPlatform=UCNV_UNKNOWN;
427        *pCCSID=0;
428    }
429}
430
431static void
432readHeader(ConvData *data,
433           FileStream* convFile,
434           const char* converterName,
435           UErrorCode *pErrorCode) {
436    char line[1024];
437    char *s, *key, *value;
438    const UConverterStaticData *prototype;
439    UConverterStaticData *staticData;
440
441    if(U_FAILURE(*pErrorCode)) {
442        return;
443    }
444
445    staticData=&data->staticData;
446    staticData->platform=UCNV_IBM;
447    staticData->subCharLen=0;
448
449    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
450        /* basic parsing and handling of state-related items */
451        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
452            continue;
453        }
454
455        /* stop at the beginning of the mapping section */
456        if(uprv_strcmp(line, "CHARMAP")==0) {
457            break;
458        }
459
460        /* collect the information from the header field, ignore unknown keys */
461        if(uprv_strcmp(key, "code_set_name")==0) {
462            if(*value!=0) {
463                uprv_strcpy((char *)staticData->name, value);
464                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
465            }
466        } else if(uprv_strcmp(key, "subchar")==0) {
467            uint8_t bytes[UCNV_EXT_MAX_BYTES];
468            int8_t length;
469
470            s=value;
471            length=ucm_parseBytes(bytes, line, (const char **)&s);
472            if(1<=length && length<=4 && *s==0) {
473                staticData->subCharLen=length;
474                uprv_memcpy(staticData->subChar, bytes, length);
475            } else {
476                fprintf(stderr, "error: illegal <subchar> %s\n", value);
477                *pErrorCode=U_INVALID_TABLE_FORMAT;
478                return;
479            }
480        } else if(uprv_strcmp(key, "subchar1")==0) {
481            uint8_t bytes[UCNV_EXT_MAX_BYTES];
482
483            s=value;
484            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
485                staticData->subChar1=bytes[0];
486            } else {
487                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
488                *pErrorCode=U_INVALID_TABLE_FORMAT;
489                return;
490            }
491        }
492    }
493
494    /* copy values from the UCMFile to the static data */
495    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
496    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
497    staticData->conversionType=data->ucm->states.conversionType;
498
499    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
500        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
501        *pErrorCode=U_INVALID_TABLE_FORMAT;
502        return;
503    }
504
505    /*
506     * Now that we know the type, copy any 'default' values from the table.
507     * We need not check the type any further because the parser only
508     * recognizes what we have prototypes for.
509     *
510     * For delta (extension-only) tables, copy values from the base file
511     * instead, see createConverter().
512     */
513    if(data->ucm->baseName[0]==0) {
514        prototype=ucnv_converterStaticData[staticData->conversionType];
515        if(prototype!=NULL) {
516            if(staticData->name[0]==0) {
517                uprv_strcpy((char *)staticData->name, prototype->name);
518            }
519
520            if(staticData->codepage==0) {
521                staticData->codepage=prototype->codepage;
522            }
523
524            if(staticData->platform==0) {
525                staticData->platform=prototype->platform;
526            }
527
528            if(staticData->minBytesPerChar==0) {
529                staticData->minBytesPerChar=prototype->minBytesPerChar;
530            }
531
532            if(staticData->maxBytesPerChar==0) {
533                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
534            }
535
536            if(staticData->subCharLen==0) {
537                staticData->subCharLen=prototype->subCharLen;
538                if(prototype->subCharLen>0) {
539                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
540                }
541            }
542        }
543    }
544
545    if(data->ucm->states.outputType<0) {
546        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
547    }
548
549    if( staticData->subChar1!=0 &&
550            (staticData->minBytesPerChar>1 ||
551                (staticData->conversionType!=UCNV_MBCS &&
552                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
553    ) {
554        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
555        *pErrorCode=U_INVALID_TABLE_FORMAT;
556    }
557}
558
559/* return TRUE if a base table was read, FALSE for an extension table */
560static UBool
561readFile(ConvData *data, const char* converterName,
562         UErrorCode *pErrorCode) {
563    char line[1024];
564    char *end;
565    FileStream *convFile;
566
567    UCMStates *baseStates;
568    UBool dataIsBase;
569
570    if(U_FAILURE(*pErrorCode)) {
571        return FALSE;
572    }
573
574    data->ucm=ucm_open();
575
576    convFile=T_FileStream_open(converterName, "r");
577    if(convFile==NULL) {
578        *pErrorCode=U_FILE_ACCESS_ERROR;
579        return FALSE;
580    }
581
582    readHeader(data, convFile, converterName, pErrorCode);
583    if(U_FAILURE(*pErrorCode)) {
584        return FALSE;
585    }
586
587    if(data->ucm->baseName[0]==0) {
588        dataIsBase=TRUE;
589        baseStates=&data->ucm->states;
590        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
591    } else {
592        dataIsBase=FALSE;
593        baseStates=NULL;
594    }
595
596    /* read the base table */
597    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
598    if(U_FAILURE(*pErrorCode)) {
599        return FALSE;
600    }
601
602    /* read an extension table if there is one */
603    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
604        end=uprv_strchr(line, 0);
605        while(line<end &&
606              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
607            --end;
608        }
609        *end=0;
610
611        if(line[0]=='#' || u_skipWhitespace(line)==end) {
612            continue; /* ignore empty and comment lines */
613        }
614
615        if(0==uprv_strcmp(line, "CHARMAP")) {
616            /* read the extension table */
617            ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
618        } else {
619            fprintf(stderr, "unexpected text after the base mapping table\n");
620        }
621        break;
622    }
623
624    T_FileStream_close(convFile);
625
626    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
627        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
628        *pErrorCode=U_INVALID_TABLE_FORMAT;
629    }
630
631    return dataIsBase;
632}
633
634static void
635createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
636    ConvData baseData;
637    UBool dataIsBase;
638
639    UConverterStaticData *staticData;
640    UCMStates *states, *baseStates;
641
642    if(U_FAILURE(*pErrorCode)) {
643        return;
644    }
645
646    initConvData(data);
647
648    dataIsBase=readFile(data, converterName, pErrorCode);
649    if(U_FAILURE(*pErrorCode)) {
650        return;
651    }
652
653    staticData=&data->staticData;
654    states=&data->ucm->states;
655
656    if(dataIsBase) {
657        /*
658         * Build a normal .cnv file with a base table
659         * and an optional extension table.
660         */
661        data->cnvData=MBCSOpen(data->ucm);
662        if(data->cnvData==NULL) {
663            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
664
665        } else if(!data->cnvData->isValid(data->cnvData,
666                            staticData->subChar, staticData->subCharLen)
667        ) {
668            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
669            *pErrorCode=U_INVALID_TABLE_FORMAT;
670
671        } else if(staticData->subChar1!=0 &&
672                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
673        ) {
674            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
675            *pErrorCode=U_INVALID_TABLE_FORMAT;
676
677        } else if(
678            data->ucm->ext->mappingsLength>0 &&
679            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
680        ) {
681            *pErrorCode=U_INVALID_TABLE_FORMAT;
682        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
683            /* sort the table so that it can be turned into UTF-8-friendly data */
684            ucm_sortTable(data->ucm->base);
685        }
686
687        if(U_SUCCESS(*pErrorCode)) {
688            if(
689                /* add the base table after ucm_checkBaseExt()! */
690                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
691            ) {
692                *pErrorCode=U_INVALID_TABLE_FORMAT;
693            } else {
694                /*
695                 * addTable() may have requested moving more mappings to the extension table
696                 * if they fit into the base toUnicode table but not into the
697                 * base fromUnicode table.
698                 * (Especially for UTF-8-friendly fromUnicode tables.)
699                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
700                 * to be excluded from the extension toUnicode data.
701                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
702                 * the base fromUnicode table.
703                 */
704                ucm_moveMappings(data->ucm->base, data->ucm->ext);
705                ucm_sortTable(data->ucm->ext);
706                if(data->ucm->ext->mappingsLength>0) {
707                    /* prepare the extension table, if there is one */
708                    data->extData=CnvExtOpen(data->ucm);
709                    if(data->extData==NULL) {
710                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
711                    } else if(
712                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
713                    ) {
714                        *pErrorCode=U_INVALID_TABLE_FORMAT;
715                    }
716                }
717            }
718        }
719    } else {
720        /* Build an extension-only .cnv file. */
721        char baseFilename[500];
722        char *basename;
723
724        initConvData(&baseData);
725
726        /* assemble a path/filename for data->ucm->baseName */
727        uprv_strcpy(baseFilename, converterName);
728        basename=(char *)findBasename(baseFilename);
729        uprv_strcpy(basename, data->ucm->baseName);
730        uprv_strcat(basename, ".ucm");
731
732        /* read the base table */
733        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
734        if(U_FAILURE(*pErrorCode)) {
735            return;
736        } else if(!dataIsBase) {
737            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
738            *pErrorCode=U_INVALID_TABLE_FORMAT;
739        } else {
740            /* prepare the extension table */
741            data->extData=CnvExtOpen(data->ucm);
742            if(data->extData==NULL) {
743                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
744            } else {
745                /* fill in gaps in extension file header fields */
746                UCMapping *m, *mLimit;
747                uint8_t fallbackFlags;
748
749                baseStates=&baseData.ucm->states;
750                if(states->conversionType==UCNV_DBCS) {
751                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
752                } else if(states->minCharLength==0) {
753                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
754                }
755                if(states->maxCharLength<states->minCharLength) {
756                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
757                }
758
759                if(staticData->subCharLen==0) {
760                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
761                    staticData->subCharLen=baseData.staticData.subCharLen;
762                }
763                /*
764                 * do not copy subChar1 -
765                 * only use what is explicitly specified
766                 * because it cannot be unset in the extension file header
767                 */
768
769                /* get the fallback flags */
770                fallbackFlags=0;
771                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
772                    m<mLimit && fallbackFlags!=3;
773                    ++m
774                ) {
775                    if(m->f==1) {
776                        fallbackFlags|=1;
777                    } else if(m->f==3) {
778                        fallbackFlags|=2;
779                    }
780                }
781
782                if(fallbackFlags&1) {
783                    staticData->hasFromUnicodeFallback=TRUE;
784                }
785                if(fallbackFlags&2) {
786                    staticData->hasToUnicodeFallback=TRUE;
787                }
788
789                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
790                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
791                    *pErrorCode=U_INVALID_TABLE_FORMAT;
792
793                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
794                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
795                    *pErrorCode=U_INVALID_TABLE_FORMAT;
796
797                } else if(
798                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
799                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
800                ) {
801                    *pErrorCode=U_INVALID_TABLE_FORMAT;
802                } else {
803                    if(states->maxCharLength>1) {
804                        /*
805                         * When building a normal .cnv file with a base table
806                         * for an MBCS (not SBCS) table with explicit precision flags,
807                         * the MBCSAddTable() function marks some mappings for moving
808                         * to the extension table.
809                         * They fit into the base toUnicode table but not into the
810                         * base fromUnicode table.
811                         * (Note: We do have explicit precision flags because they are
812                         * required for extension table generation, and
813                         * ucm_checkBaseExt() verified it.)
814                         *
815                         * We do not call MBCSAddTable() here (we probably could)
816                         * so we need to do the analysis before building the extension table.
817                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
818                         * Redundant mappings in the extension table are ok except they cost some size.
819                         *
820                         * Do this after ucm_checkBaseExt().
821                         */
822                        const MBCSData *mbcsData=MBCSGetDummy();
823                        int32_t needsMove=0;
824                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
825                            m<mLimit;
826                            ++m
827                        ) {
828                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
829                                m->f|=MBCS_FROM_U_EXT_FLAG;
830                                m->moveFlag=UCM_MOVE_TO_EXT;
831                                ++needsMove;
832                            }
833                        }
834
835                        if(needsMove!=0) {
836                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
837                            ucm_sortTable(data->ucm->ext);
838                        }
839                    }
840                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
841                        *pErrorCode=U_INVALID_TABLE_FORMAT;
842                    }
843                }
844            }
845        }
846
847        cleanupConvData(&baseData);
848    }
849}
850
851/*
852 * Hey, Emacs, please set the following:
853 *
854 * Local Variables:
855 * indent-tabs-mode: nil
856 * End:
857 *
858 */
859