1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 ********************************************************************************
5 *
6 *   Copyright (C) 1998-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ********************************************************************************
10 *
11 *
12 *  makeconv.cpp:
13 *  tool creating a binary (compressed) representation of the conversion mapping
14 *  table (IBM NLTC ucmap format).
15 *
16 *  05/04/2000    helena     Added fallback mapping into the picture...
17 *  06/29/2000  helena      Major rewrite of the callback APIs.
18 */
19
20#include <stdio.h>
21#include "unicode/putil.h"
22#include "unicode/ucnv_err.h"
23#include "charstr.h"
24#include "ucnv_bld.h"
25#include "ucnv_imp.h"
26#include "ucnv_cnv.h"
27#include "cstring.h"
28#include "cmemory.h"
29#include "uinvchar.h"
30#include "filestrm.h"
31#include "toolutil.h"
32#include "uoptions.h"
33#include "unicode/udata.h"
34#include "unewdata.h"
35#include "uparse.h"
36#include "ucm.h"
37#include "makeconv.h"
38#include "genmbcs.h"
39
40#define DEBUG 0
41
42typedef struct ConvData {
43    UCMFile *ucm;
44    NewConverter *cnvData, *extData;
45    UConverterSharedData sharedData;
46    UConverterStaticData staticData;
47} ConvData;
48
49static void
50initConvData(ConvData *data) {
51    uprv_memset(data, 0, sizeof(ConvData));
52    data->sharedData.structSize=sizeof(UConverterSharedData);
53    data->staticData.structSize=sizeof(UConverterStaticData);
54    data->sharedData.staticData=&data->staticData;
55}
56
57static void
58cleanupConvData(ConvData *data) {
59    if(data!=NULL) {
60        if(data->cnvData!=NULL) {
61            data->cnvData->close(data->cnvData);
62            data->cnvData=NULL;
63        }
64        if(data->extData!=NULL) {
65            data->extData->close(data->extData);
66            data->extData=NULL;
67        }
68        ucm_close(data->ucm);
69        data->ucm=NULL;
70    }
71}
72
73/*
74 * from ucnvstat.c - static prototypes of data-based converters
75 */
76U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77
78/*
79 * Global - verbosity
80 */
81UBool VERBOSE = FALSE;
82UBool QUIET = FALSE;
83UBool SMALL = FALSE;
84UBool IGNORE_SISO_CHECK = FALSE;
85
86static void
87createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88
89/*
90 * Set up the UNewData and write the converter..
91 */
92static void
93writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94
95UBool haveCopyright=TRUE;
96
97static UDataInfo dataInfo={
98    sizeof(UDataInfo),
99    0,
100
101    U_IS_BIG_ENDIAN,
102    U_CHARSET_FAMILY,
103    sizeof(UChar),
104    0,
105
106    {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
107    {6, 2, 0, 0},                 /* formatVersion */
108    {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
109};
110
111static void
112writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113{
114    UNewDataMemory *mem = NULL;
115    uint32_t sz2;
116    uint32_t size = 0;
117    int32_t tableType;
118
119    if(U_FAILURE(*status))
120      {
121        return;
122      }
123
124    tableType=TABLE_NONE;
125    if(data->cnvData!=NULL) {
126        tableType|=TABLE_BASE;
127    }
128    if(data->extData!=NULL) {
129        tableType|=TABLE_EXT;
130    }
131
132    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133
134    if(U_FAILURE(*status))
135      {
136        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137                cnvName,
138                "cnv",
139                u_errorName(*status));
140        return;
141      }
142
143    if(VERBOSE)
144      {
145        printf("- Opened udata %s.%s\n", cnvName, "cnv");
146      }
147
148
149    /* all read only, clean, platform independent data.  Mmmm. :)  */
150    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
152    /* Now, write the table */
153    if(tableType&TABLE_BASE) {
154        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155    }
156    if(tableType&TABLE_EXT) {
157        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158    }
159
160    sz2 = udata_finish(mem, status);
161    if(size != sz2)
162    {
163        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
164        *status=U_INTERNAL_PROGRAM_ERROR;
165    }
166    if(VERBOSE)
167    {
168      printf("- Wrote %u bytes to the udata.\n", (int)sz2);
169    }
170}
171
172enum {
173    OPT_HELP_H,
174    OPT_HELP_QUESTION_MARK,
175    OPT_COPYRIGHT,
176    OPT_VERSION,
177    OPT_DESTDIR,
178    OPT_VERBOSE,
179    OPT_SMALL,
180    OPT_IGNORE_SISO_CHECK,
181    OPT_QUIET,
182
183    OPT_COUNT
184};
185
186static UOption options[]={
187    UOPTION_HELP_H,
188    UOPTION_HELP_QUESTION_MARK,
189    UOPTION_COPYRIGHT,
190    UOPTION_VERSION,
191    UOPTION_DESTDIR,
192    UOPTION_VERBOSE,
193    { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
194    { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
195    UOPTION_QUIET,
196};
197
198int main(int argc, char* argv[])
199{
200    ConvData data;
201    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202
203    U_MAIN_INIT_ARGS(argc, argv);
204
205    /* Set up the ICU version number */
206    UVersionInfo icuVersion;
207    u_getVersion(icuVersion);
208    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
209
210    /* preset then read command line options */
211    options[OPT_DESTDIR].value=u_getDataDirectory();
212    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
213
214    /* error handling, printing usage message */
215    if(argc<0) {
216        fprintf(stderr,
217            "error in command line argument \"%s\"\n",
218            argv[-argc]);
219    } else if(argc<2) {
220        argc=-1;
221    }
222    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
223        FILE *stdfile=argc<0 ? stderr : stdout;
224        fprintf(stdfile,
225            "usage: %s [-options] files...\n"
226            "\tread .ucm codepage mapping files and write .cnv files\n"
227            "options:\n"
228            "\t-h or -? or --help  this usage text\n"
229            "\t-V or --version     show a version message\n"
230            "\t-c or --copyright   include a copyright notice\n"
231            "\t-d or --destdir     destination directory, followed by the path\n"
232            "\t-v or --verbose     Turn on verbose output\n"
233            "\t-q or --quiet       do not display warnings and progress\n",
234            argv[0]);
235        fprintf(stdfile,
236            "\t      --small       Generate smaller .cnv files. They will be\n"
237            "\t                    significantly smaller but may not be compatible with\n"
238            "\t                    older versions of ICU and will require heap memory\n"
239            "\t                    allocation when loaded.\n"
240            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
241        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
242    }
243
244    if(options[OPT_VERSION].doesOccur) {
245        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
246               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
247        printf("%s\n", U_COPYRIGHT_STRING);
248        exit(0);
249    }
250
251    /* get the options values */
252    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
253    const char *destdir = options[OPT_DESTDIR].value;
254    VERBOSE = options[OPT_VERBOSE].doesOccur;
255    QUIET = options[OPT_QUIET].doesOccur;
256    SMALL = options[OPT_SMALL].doesOccur;
257
258    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259        IGNORE_SISO_CHECK = TRUE;
260    }
261
262    icu::CharString outFileName;
263    UErrorCode err = U_ZERO_ERROR;
264    if (destdir != NULL && *destdir != 0) {
265        outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
266        if (U_FAILURE(err)) {
267            return err;
268        }
269    }
270    int32_t outBasenameStart = outFileName.length();
271
272#if DEBUG
273    {
274      int i;
275      printf("makeconv: processing %d files...\n", argc - 1);
276      for(i=1; i<argc; ++i) {
277        printf("%s ", argv[i]);
278      }
279      printf("\n");
280      fflush(stdout);
281    }
282#endif
283
284    UBool printFilename = (UBool) (argc > 2 || VERBOSE);
285    for (++argv; --argc; ++argv)
286    {
287        UErrorCode localError = U_ZERO_ERROR;
288        const char *arg = getLongPathname(*argv);
289
290        /*produces the right destination path for display*/
291        outFileName.truncate(outBasenameStart);
292        if (outBasenameStart != 0)
293        {
294            /* find the last file sepator */
295            const char *basename = findBasename(arg);
296            outFileName.append(basename, localError);
297        }
298        else
299        {
300            outFileName.append(arg, localError);
301        }
302        if (U_FAILURE(localError)) {
303            return localError;
304        }
305
306        /*removes the extension if any is found*/
307        int32_t lastDotIndex = outFileName.lastIndexOf('.');
308        if (lastDotIndex >= outBasenameStart) {
309            outFileName.truncate(lastDotIndex);
310        }
311
312        /* the basename without extension is the converter name */
313        if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
314            fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
315            return U_BUFFER_OVERFLOW_ERROR;
316        }
317        uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
318
319        /*Adds the target extension*/
320        outFileName.append(CONVERTER_FILE_EXTENSION, localError);
321        if (U_FAILURE(localError)) {
322            return localError;
323        }
324
325#if DEBUG
326        printf("makeconv: processing %s  ...\n", arg);
327        fflush(stdout);
328#endif
329        initConvData(&data);
330        createConverter(&data, arg, &localError);
331
332        if (U_FAILURE(localError))
333        {
334            /* if an error is found, print out an error msg and keep going */
335            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
336                    outFileName.data(), arg, u_errorName(localError));
337            if(U_SUCCESS(err)) {
338                err = localError;
339            }
340        }
341        else
342        {
343            /* Insure the static data name matches the  file name */
344            /* Changed to ignore directory and only compare base name
345             LDH 1/2/08*/
346            char *p;
347            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
348
349            if(p == NULL)            /* OK, try alternate */
350            {
351                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
352                if(p == NULL)
353                {
354                    p=cnvName; /* If no separators, no problem */
355                }
356            }
357            else
358            {
359                p++;   /* If found separator, don't include it in compare */
360            }
361            if(uprv_stricmp(p,data.staticData.name) && !QUIET)
362            {
363                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
364                    cnvName,  CONVERTER_FILE_EXTENSION,
365                    data.staticData.name);
366            }
367
368            uprv_strcpy((char*)data.staticData.name, cnvName);
369
370            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
371                fprintf(stderr,
372                    "Error: A converter name must contain only invariant characters.\n"
373                    "%s is not a valid converter name.\n",
374                    data.staticData.name);
375                if(U_SUCCESS(err)) {
376                    err = U_INVALID_TABLE_FORMAT;
377                }
378            }
379
380            localError = U_ZERO_ERROR;
381            writeConverterData(&data, cnvName, destdir, &localError);
382
383            if(U_FAILURE(localError))
384            {
385                /* if an error is found, print out an error msg and keep going*/
386                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
387                    u_errorName(localError));
388                if(U_SUCCESS(err)) {
389                    err = localError;
390                }
391            }
392            else if (printFilename)
393            {
394                puts(outFileName.data() + outBasenameStart);
395            }
396        }
397        fflush(stdout);
398        fflush(stderr);
399
400        cleanupConvData(&data);
401    }
402
403    return err;
404}
405
406static void
407getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
408    if( (name[0]=='i' || name[0]=='I') &&
409        (name[1]=='b' || name[1]=='B') &&
410        (name[2]=='m' || name[2]=='M')
411    ) {
412        name+=3;
413        if(*name=='-') {
414            ++name;
415        }
416        *pPlatform=UCNV_IBM;
417        *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
418    } else {
419        *pPlatform=UCNV_UNKNOWN;
420        *pCCSID=0;
421    }
422}
423
424static void
425readHeader(ConvData *data,
426           FileStream* convFile,
427           UErrorCode *pErrorCode) {
428    char line[1024];
429    char *s, *key, *value;
430    const UConverterStaticData *prototype;
431    UConverterStaticData *staticData;
432
433    if(U_FAILURE(*pErrorCode)) {
434        return;
435    }
436
437    staticData=&data->staticData;
438    staticData->platform=UCNV_IBM;
439    staticData->subCharLen=0;
440
441    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
442        /* basic parsing and handling of state-related items */
443        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
444            continue;
445        }
446
447        /* stop at the beginning of the mapping section */
448        if(uprv_strcmp(line, "CHARMAP")==0) {
449            break;
450        }
451
452        /* collect the information from the header field, ignore unknown keys */
453        if(uprv_strcmp(key, "code_set_name")==0) {
454            if(*value!=0) {
455                uprv_strcpy((char *)staticData->name, value);
456                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
457            }
458        } else if(uprv_strcmp(key, "subchar")==0) {
459            uint8_t bytes[UCNV_EXT_MAX_BYTES];
460            int8_t length;
461
462            s=value;
463            length=ucm_parseBytes(bytes, line, (const char **)&s);
464            if(1<=length && length<=4 && *s==0) {
465                staticData->subCharLen=length;
466                uprv_memcpy(staticData->subChar, bytes, length);
467            } else {
468                fprintf(stderr, "error: illegal <subchar> %s\n", value);
469                *pErrorCode=U_INVALID_TABLE_FORMAT;
470                return;
471            }
472        } else if(uprv_strcmp(key, "subchar1")==0) {
473            uint8_t bytes[UCNV_EXT_MAX_BYTES];
474
475            s=value;
476            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
477                staticData->subChar1=bytes[0];
478            } else {
479                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
480                *pErrorCode=U_INVALID_TABLE_FORMAT;
481                return;
482            }
483        }
484    }
485
486    /* copy values from the UCMFile to the static data */
487    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
488    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
489    staticData->conversionType=data->ucm->states.conversionType;
490
491    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
492        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
493        *pErrorCode=U_INVALID_TABLE_FORMAT;
494        return;
495    }
496
497    /*
498     * Now that we know the type, copy any 'default' values from the table.
499     * We need not check the type any further because the parser only
500     * recognizes what we have prototypes for.
501     *
502     * For delta (extension-only) tables, copy values from the base file
503     * instead, see createConverter().
504     */
505    if(data->ucm->baseName[0]==0) {
506        prototype=ucnv_converterStaticData[staticData->conversionType];
507        if(prototype!=NULL) {
508            if(staticData->name[0]==0) {
509                uprv_strcpy((char *)staticData->name, prototype->name);
510            }
511
512            if(staticData->codepage==0) {
513                staticData->codepage=prototype->codepage;
514            }
515
516            if(staticData->platform==0) {
517                staticData->platform=prototype->platform;
518            }
519
520            if(staticData->minBytesPerChar==0) {
521                staticData->minBytesPerChar=prototype->minBytesPerChar;
522            }
523
524            if(staticData->maxBytesPerChar==0) {
525                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
526            }
527
528            if(staticData->subCharLen==0) {
529                staticData->subCharLen=prototype->subCharLen;
530                if(prototype->subCharLen>0) {
531                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
532                }
533            }
534        }
535    }
536
537    if(data->ucm->states.outputType<0) {
538        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
539    }
540
541    if( staticData->subChar1!=0 &&
542            (staticData->minBytesPerChar>1 ||
543                (staticData->conversionType!=UCNV_MBCS &&
544                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
545    ) {
546        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
547        *pErrorCode=U_INVALID_TABLE_FORMAT;
548    }
549}
550
551/* return TRUE if a base table was read, FALSE for an extension table */
552static UBool
553readFile(ConvData *data, const char* converterName,
554         UErrorCode *pErrorCode) {
555    char line[1024];
556    char *end;
557    FileStream *convFile;
558
559    UCMStates *baseStates;
560    UBool dataIsBase;
561
562    if(U_FAILURE(*pErrorCode)) {
563        return FALSE;
564    }
565
566    data->ucm=ucm_open();
567
568    convFile=T_FileStream_open(converterName, "r");
569    if(convFile==NULL) {
570        *pErrorCode=U_FILE_ACCESS_ERROR;
571        return FALSE;
572    }
573
574    readHeader(data, convFile, pErrorCode);
575    if(U_FAILURE(*pErrorCode)) {
576        return FALSE;
577    }
578
579    if(data->ucm->baseName[0]==0) {
580        dataIsBase=TRUE;
581        baseStates=&data->ucm->states;
582        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
583    } else {
584        dataIsBase=FALSE;
585        baseStates=NULL;
586    }
587
588    /* read the base table */
589    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
590    if(U_FAILURE(*pErrorCode)) {
591        return FALSE;
592    }
593
594    /* read an extension table if there is one */
595    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
596        end=uprv_strchr(line, 0);
597        while(line<end &&
598              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
599            --end;
600        }
601        *end=0;
602
603        if(line[0]=='#' || u_skipWhitespace(line)==end) {
604            continue; /* ignore empty and comment lines */
605        }
606
607        if(0==uprv_strcmp(line, "CHARMAP")) {
608            /* read the extension table */
609            ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
610        } else {
611            fprintf(stderr, "unexpected text after the base mapping table\n");
612        }
613        break;
614    }
615
616    T_FileStream_close(convFile);
617
618    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
619        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
620        *pErrorCode=U_INVALID_TABLE_FORMAT;
621    }
622
623    return dataIsBase;
624}
625
626static void
627createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
628    ConvData baseData;
629    UBool dataIsBase;
630
631    UConverterStaticData *staticData;
632    UCMStates *states, *baseStates;
633
634    if(U_FAILURE(*pErrorCode)) {
635        return;
636    }
637
638    initConvData(data);
639
640    dataIsBase=readFile(data, converterName, pErrorCode);
641    if(U_FAILURE(*pErrorCode)) {
642        return;
643    }
644
645    staticData=&data->staticData;
646    states=&data->ucm->states;
647
648    if(dataIsBase) {
649        /*
650         * Build a normal .cnv file with a base table
651         * and an optional extension table.
652         */
653        data->cnvData=MBCSOpen(data->ucm);
654        if(data->cnvData==NULL) {
655            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
656
657        } else if(!data->cnvData->isValid(data->cnvData,
658                            staticData->subChar, staticData->subCharLen)
659        ) {
660            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
661            *pErrorCode=U_INVALID_TABLE_FORMAT;
662
663        } else if(staticData->subChar1!=0 &&
664                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
665        ) {
666            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
667            *pErrorCode=U_INVALID_TABLE_FORMAT;
668
669        } else if(
670            data->ucm->ext->mappingsLength>0 &&
671            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
672        ) {
673            *pErrorCode=U_INVALID_TABLE_FORMAT;
674        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
675            /* sort the table so that it can be turned into UTF-8-friendly data */
676            ucm_sortTable(data->ucm->base);
677        }
678
679        if(U_SUCCESS(*pErrorCode)) {
680            if(
681                /* add the base table after ucm_checkBaseExt()! */
682                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
683            ) {
684                *pErrorCode=U_INVALID_TABLE_FORMAT;
685            } else {
686                /*
687                 * addTable() may have requested moving more mappings to the extension table
688                 * if they fit into the base toUnicode table but not into the
689                 * base fromUnicode table.
690                 * (Especially for UTF-8-friendly fromUnicode tables.)
691                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
692                 * to be excluded from the extension toUnicode data.
693                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
694                 * the base fromUnicode table.
695                 */
696                ucm_moveMappings(data->ucm->base, data->ucm->ext);
697                ucm_sortTable(data->ucm->ext);
698                if(data->ucm->ext->mappingsLength>0) {
699                    /* prepare the extension table, if there is one */
700                    data->extData=CnvExtOpen(data->ucm);
701                    if(data->extData==NULL) {
702                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
703                    } else if(
704                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
705                    ) {
706                        *pErrorCode=U_INVALID_TABLE_FORMAT;
707                    }
708                }
709            }
710        }
711    } else {
712        /* Build an extension-only .cnv file. */
713        char baseFilename[500];
714        char *basename;
715
716        initConvData(&baseData);
717
718        /* assemble a path/filename for data->ucm->baseName */
719        uprv_strcpy(baseFilename, converterName);
720        basename=(char *)findBasename(baseFilename);
721        uprv_strcpy(basename, data->ucm->baseName);
722        uprv_strcat(basename, ".ucm");
723
724        /* read the base table */
725        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
726        if(U_FAILURE(*pErrorCode)) {
727            return;
728        } else if(!dataIsBase) {
729            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
730            *pErrorCode=U_INVALID_TABLE_FORMAT;
731        } else {
732            /* prepare the extension table */
733            data->extData=CnvExtOpen(data->ucm);
734            if(data->extData==NULL) {
735                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
736            } else {
737                /* fill in gaps in extension file header fields */
738                UCMapping *m, *mLimit;
739                uint8_t fallbackFlags;
740
741                baseStates=&baseData.ucm->states;
742                if(states->conversionType==UCNV_DBCS) {
743                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
744                } else if(states->minCharLength==0) {
745                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
746                }
747                if(states->maxCharLength<states->minCharLength) {
748                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
749                }
750
751                if(staticData->subCharLen==0) {
752                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
753                    staticData->subCharLen=baseData.staticData.subCharLen;
754                }
755                /*
756                 * do not copy subChar1 -
757                 * only use what is explicitly specified
758                 * because it cannot be unset in the extension file header
759                 */
760
761                /* get the fallback flags */
762                fallbackFlags=0;
763                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
764                    m<mLimit && fallbackFlags!=3;
765                    ++m
766                ) {
767                    if(m->f==1) {
768                        fallbackFlags|=1;
769                    } else if(m->f==3) {
770                        fallbackFlags|=2;
771                    }
772                }
773
774                if(fallbackFlags&1) {
775                    staticData->hasFromUnicodeFallback=TRUE;
776                }
777                if(fallbackFlags&2) {
778                    staticData->hasToUnicodeFallback=TRUE;
779                }
780
781                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
782                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
783                    *pErrorCode=U_INVALID_TABLE_FORMAT;
784
785                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
786                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
787                    *pErrorCode=U_INVALID_TABLE_FORMAT;
788
789                } else if(
790                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
791                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
792                ) {
793                    *pErrorCode=U_INVALID_TABLE_FORMAT;
794                } else {
795                    if(states->maxCharLength>1) {
796                        /*
797                         * When building a normal .cnv file with a base table
798                         * for an MBCS (not SBCS) table with explicit precision flags,
799                         * the MBCSAddTable() function marks some mappings for moving
800                         * to the extension table.
801                         * They fit into the base toUnicode table but not into the
802                         * base fromUnicode table.
803                         * (Note: We do have explicit precision flags because they are
804                         * required for extension table generation, and
805                         * ucm_checkBaseExt() verified it.)
806                         *
807                         * We do not call MBCSAddTable() here (we probably could)
808                         * so we need to do the analysis before building the extension table.
809                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
810                         * Redundant mappings in the extension table are ok except they cost some size.
811                         *
812                         * Do this after ucm_checkBaseExt().
813                         */
814                        const MBCSData *mbcsData=MBCSGetDummy();
815                        int32_t needsMove=0;
816                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
817                            m<mLimit;
818                            ++m
819                        ) {
820                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
821                                m->f|=MBCS_FROM_U_EXT_FLAG;
822                                m->moveFlag=UCM_MOVE_TO_EXT;
823                                ++needsMove;
824                            }
825                        }
826
827                        if(needsMove!=0) {
828                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
829                            ucm_sortTable(data->ucm->ext);
830                        }
831                    }
832                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
833                        *pErrorCode=U_INVALID_TABLE_FORMAT;
834                    }
835                }
836            }
837        }
838
839        cleanupConvData(&baseData);
840    }
841}
842
843/*
844 * Hey, Emacs, please set the following:
845 *
846 * Local Variables:
847 * indent-tabs-mode: nil
848 * End:
849 *
850 */
851