1/*
2 ********************************************************************************
3 *
4 *   Copyright (C) 1998-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 *  makeconv.cpp:
11 *  tool creating a binary (compressed) representation of the conversion mapping
12 *  table (IBM NLTC ucmap format).
13 *
14 *  05/04/2000    helena     Added fallback mapping into the picture...
15 *  06/29/2000  helena      Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
20#include "unicode/ucnv_err.h"
21#include "charstr.h"
22#include "ucnv_bld.h"
23#include "ucnv_imp.h"
24#include "ucnv_cnv.h"
25#include "cstring.h"
26#include "cmemory.h"
27#include "uinvchar.h"
28#include "filestrm.h"
29#include "toolutil.h"
30#include "uoptions.h"
31#include "unicode/udata.h"
32#include "unewdata.h"
33#include "uparse.h"
34#include "ucm.h"
35#include "makeconv.h"
36#include "genmbcs.h"
37
38#define DEBUG 0
39
40typedef struct ConvData {
41    UCMFile *ucm;
42    NewConverter *cnvData, *extData;
43    UConverterSharedData sharedData;
44    UConverterStaticData staticData;
45} ConvData;
46
47static void
48initConvData(ConvData *data) {
49    uprv_memset(data, 0, sizeof(ConvData));
50    data->sharedData.structSize=sizeof(UConverterSharedData);
51    data->staticData.structSize=sizeof(UConverterStaticData);
52    data->sharedData.staticData=&data->staticData;
53}
54
55static void
56cleanupConvData(ConvData *data) {
57    if(data!=NULL) {
58        if(data->cnvData!=NULL) {
59            data->cnvData->close(data->cnvData);
60            data->cnvData=NULL;
61        }
62        if(data->extData!=NULL) {
63            data->extData->close(data->extData);
64            data->extData=NULL;
65        }
66        ucm_close(data->ucm);
67        data->ucm=NULL;
68    }
69}
70
71/*
72 * from ucnvstat.c - static prototypes of data-based converters
73 */
74U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
75
76/*
77 * Global - verbosity
78 */
79UBool VERBOSE = FALSE;
80UBool QUIET = FALSE;
81UBool SMALL = FALSE;
82UBool IGNORE_SISO_CHECK = FALSE;
83
84static void
85createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
86
87/*
88 * Set up the UNewData and write the converter..
89 */
90static void
91writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
92
93UBool haveCopyright=TRUE;
94
95static UDataInfo dataInfo={
96    sizeof(UDataInfo),
97    0,
98
99    U_IS_BIG_ENDIAN,
100    U_CHARSET_FAMILY,
101    sizeof(UChar),
102    0,
103
104    {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
105    {6, 2, 0, 0},                 /* formatVersion */
106    {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
107};
108
109static void
110writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
111{
112    UNewDataMemory *mem = NULL;
113    uint32_t sz2;
114    uint32_t size = 0;
115    int32_t tableType;
116
117    if(U_FAILURE(*status))
118      {
119        return;
120      }
121
122    tableType=TABLE_NONE;
123    if(data->cnvData!=NULL) {
124        tableType|=TABLE_BASE;
125    }
126    if(data->extData!=NULL) {
127        tableType|=TABLE_EXT;
128    }
129
130    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131
132    if(U_FAILURE(*status))
133      {
134        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135                cnvName,
136                "cnv",
137                u_errorName(*status));
138        return;
139      }
140
141    if(VERBOSE)
142      {
143        printf("- Opened udata %s.%s\n", cnvName, "cnv");
144      }
145
146
147    /* all read only, clean, platform independent data.  Mmmm. :)  */
148    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
149    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
150    /* Now, write the table */
151    if(tableType&TABLE_BASE) {
152        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153    }
154    if(tableType&TABLE_EXT) {
155        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156    }
157
158    sz2 = udata_finish(mem, status);
159    if(size != sz2)
160    {
161        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
162        *status=U_INTERNAL_PROGRAM_ERROR;
163    }
164    if(VERBOSE)
165    {
166      printf("- Wrote %u bytes to the udata.\n", (int)sz2);
167    }
168}
169
170enum {
171    OPT_HELP_H,
172    OPT_HELP_QUESTION_MARK,
173    OPT_COPYRIGHT,
174    OPT_VERSION,
175    OPT_DESTDIR,
176    OPT_VERBOSE,
177    OPT_SMALL,
178    OPT_IGNORE_SISO_CHECK,
179    OPT_QUIET,
180
181    OPT_COUNT
182};
183
184static UOption options[]={
185    UOPTION_HELP_H,
186    UOPTION_HELP_QUESTION_MARK,
187    UOPTION_COPYRIGHT,
188    UOPTION_VERSION,
189    UOPTION_DESTDIR,
190    UOPTION_VERBOSE,
191    { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
192    { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
193    UOPTION_QUIET,
194};
195
196int main(int argc, char* argv[])
197{
198    ConvData data;
199    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200
201    U_MAIN_INIT_ARGS(argc, argv);
202
203    /* Set up the ICU version number */
204    UVersionInfo icuVersion;
205    u_getVersion(icuVersion);
206    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
207
208    /* preset then read command line options */
209    options[OPT_DESTDIR].value=u_getDataDirectory();
210    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
211
212    /* error handling, printing usage message */
213    if(argc<0) {
214        fprintf(stderr,
215            "error in command line argument \"%s\"\n",
216            argv[-argc]);
217    } else if(argc<2) {
218        argc=-1;
219    }
220    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
221        FILE *stdfile=argc<0 ? stderr : stdout;
222        fprintf(stdfile,
223            "usage: %s [-options] files...\n"
224            "\tread .ucm codepage mapping files and write .cnv files\n"
225            "options:\n"
226            "\t-h or -? or --help  this usage text\n"
227            "\t-V or --version     show a version message\n"
228            "\t-c or --copyright   include a copyright notice\n"
229            "\t-d or --destdir     destination directory, followed by the path\n"
230            "\t-v or --verbose     Turn on verbose output\n"
231            "\t-q or --quiet       do not display warnings and progress\n",
232            argv[0]);
233        fprintf(stdfile,
234            "\t      --small       Generate smaller .cnv files. They will be\n"
235            "\t                    significantly smaller but may not be compatible with\n"
236            "\t                    older versions of ICU and will require heap memory\n"
237            "\t                    allocation when loaded.\n"
238            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
239        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
240    }
241
242    if(options[OPT_VERSION].doesOccur) {
243        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
244               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
245        printf("%s\n", U_COPYRIGHT_STRING);
246        exit(0);
247    }
248
249    /* get the options values */
250    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
251    const char *destdir = options[OPT_DESTDIR].value;
252    VERBOSE = options[OPT_VERBOSE].doesOccur;
253    QUIET = options[OPT_QUIET].doesOccur;
254    SMALL = options[OPT_SMALL].doesOccur;
255
256    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
257        IGNORE_SISO_CHECK = TRUE;
258    }
259
260    icu::CharString outFileName;
261    UErrorCode err = U_ZERO_ERROR;
262    if (destdir != NULL && *destdir != 0) {
263        outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
264        if (U_FAILURE(err)) {
265            return err;
266        }
267    }
268    int32_t outBasenameStart = outFileName.length();
269
270#if DEBUG
271    {
272      int i;
273      printf("makeconv: processing %d files...\n", argc - 1);
274      for(i=1; i<argc; ++i) {
275        printf("%s ", argv[i]);
276      }
277      printf("\n");
278      fflush(stdout);
279    }
280#endif
281
282    UBool printFilename = (UBool) (argc > 2 || VERBOSE);
283    for (++argv; --argc; ++argv)
284    {
285        UErrorCode localError = U_ZERO_ERROR;
286        const char *arg = getLongPathname(*argv);
287
288        /*produces the right destination path for display*/
289        outFileName.truncate(outBasenameStart);
290        if (outBasenameStart != 0)
291        {
292            /* find the last file sepator */
293            const char *basename = findBasename(arg);
294            outFileName.append(basename, localError);
295        }
296        else
297        {
298            outFileName.append(arg, localError);
299        }
300        if (U_FAILURE(localError)) {
301            return localError;
302        }
303
304        /*removes the extension if any is found*/
305        int32_t lastDotIndex = outFileName.lastIndexOf('.');
306        if (lastDotIndex >= outBasenameStart) {
307            outFileName.truncate(lastDotIndex);
308        }
309
310        /* the basename without extension is the converter name */
311        if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
312            fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
313            return U_BUFFER_OVERFLOW_ERROR;
314        }
315        uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
316
317        /*Adds the target extension*/
318        outFileName.append(CONVERTER_FILE_EXTENSION, localError);
319        if (U_FAILURE(localError)) {
320            return localError;
321        }
322
323#if DEBUG
324        printf("makeconv: processing %s  ...\n", arg);
325        fflush(stdout);
326#endif
327        initConvData(&data);
328        createConverter(&data, arg, &localError);
329
330        if (U_FAILURE(localError))
331        {
332            /* if an error is found, print out an error msg and keep going */
333            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
334                    outFileName.data(), arg, u_errorName(localError));
335            if(U_SUCCESS(err)) {
336                err = localError;
337            }
338        }
339        else
340        {
341            /* Insure the static data name matches the  file name */
342            /* Changed to ignore directory and only compare base name
343             LDH 1/2/08*/
344            char *p;
345            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
346
347            if(p == NULL)            /* OK, try alternate */
348            {
349                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
350                if(p == NULL)
351                {
352                    p=cnvName; /* If no separators, no problem */
353                }
354            }
355            else
356            {
357                p++;   /* If found separator, don't include it in compare */
358            }
359            if(uprv_stricmp(p,data.staticData.name) && !QUIET)
360            {
361                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
362                    cnvName,  CONVERTER_FILE_EXTENSION,
363                    data.staticData.name);
364            }
365
366            uprv_strcpy((char*)data.staticData.name, cnvName);
367
368            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
369                fprintf(stderr,
370                    "Error: A converter name must contain only invariant characters.\n"
371                    "%s is not a valid converter name.\n",
372                    data.staticData.name);
373                if(U_SUCCESS(err)) {
374                    err = U_INVALID_TABLE_FORMAT;
375                }
376            }
377
378            localError = U_ZERO_ERROR;
379            writeConverterData(&data, cnvName, destdir, &localError);
380
381            if(U_FAILURE(localError))
382            {
383                /* if an error is found, print out an error msg and keep going*/
384                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
385                    u_errorName(localError));
386                if(U_SUCCESS(err)) {
387                    err = localError;
388                }
389            }
390            else if (printFilename)
391            {
392                puts(outFileName.data() + outBasenameStart);
393            }
394        }
395        fflush(stdout);
396        fflush(stderr);
397
398        cleanupConvData(&data);
399    }
400
401    return err;
402}
403
404static void
405getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
406    if( (name[0]=='i' || name[0]=='I') &&
407        (name[1]=='b' || name[1]=='B') &&
408        (name[2]=='m' || name[2]=='M')
409    ) {
410        name+=3;
411        if(*name=='-') {
412            ++name;
413        }
414        *pPlatform=UCNV_IBM;
415        *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
416    } else {
417        *pPlatform=UCNV_UNKNOWN;
418        *pCCSID=0;
419    }
420}
421
422static void
423readHeader(ConvData *data,
424           FileStream* convFile,
425           UErrorCode *pErrorCode) {
426    char line[1024];
427    char *s, *key, *value;
428    const UConverterStaticData *prototype;
429    UConverterStaticData *staticData;
430
431    if(U_FAILURE(*pErrorCode)) {
432        return;
433    }
434
435    staticData=&data->staticData;
436    staticData->platform=UCNV_IBM;
437    staticData->subCharLen=0;
438
439    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
440        /* basic parsing and handling of state-related items */
441        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
442            continue;
443        }
444
445        /* stop at the beginning of the mapping section */
446        if(uprv_strcmp(line, "CHARMAP")==0) {
447            break;
448        }
449
450        /* collect the information from the header field, ignore unknown keys */
451        if(uprv_strcmp(key, "code_set_name")==0) {
452            if(*value!=0) {
453                uprv_strcpy((char *)staticData->name, value);
454                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
455            }
456        } else if(uprv_strcmp(key, "subchar")==0) {
457            uint8_t bytes[UCNV_EXT_MAX_BYTES];
458            int8_t length;
459
460            s=value;
461            length=ucm_parseBytes(bytes, line, (const char **)&s);
462            if(1<=length && length<=4 && *s==0) {
463                staticData->subCharLen=length;
464                uprv_memcpy(staticData->subChar, bytes, length);
465            } else {
466                fprintf(stderr, "error: illegal <subchar> %s\n", value);
467                *pErrorCode=U_INVALID_TABLE_FORMAT;
468                return;
469            }
470        } else if(uprv_strcmp(key, "subchar1")==0) {
471            uint8_t bytes[UCNV_EXT_MAX_BYTES];
472
473            s=value;
474            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
475                staticData->subChar1=bytes[0];
476            } else {
477                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
478                *pErrorCode=U_INVALID_TABLE_FORMAT;
479                return;
480            }
481        }
482    }
483
484    /* copy values from the UCMFile to the static data */
485    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
486    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
487    staticData->conversionType=data->ucm->states.conversionType;
488
489    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
490        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
491        *pErrorCode=U_INVALID_TABLE_FORMAT;
492        return;
493    }
494
495    /*
496     * Now that we know the type, copy any 'default' values from the table.
497     * We need not check the type any further because the parser only
498     * recognizes what we have prototypes for.
499     *
500     * For delta (extension-only) tables, copy values from the base file
501     * instead, see createConverter().
502     */
503    if(data->ucm->baseName[0]==0) {
504        prototype=ucnv_converterStaticData[staticData->conversionType];
505        if(prototype!=NULL) {
506            if(staticData->name[0]==0) {
507                uprv_strcpy((char *)staticData->name, prototype->name);
508            }
509
510            if(staticData->codepage==0) {
511                staticData->codepage=prototype->codepage;
512            }
513
514            if(staticData->platform==0) {
515                staticData->platform=prototype->platform;
516            }
517
518            if(staticData->minBytesPerChar==0) {
519                staticData->minBytesPerChar=prototype->minBytesPerChar;
520            }
521
522            if(staticData->maxBytesPerChar==0) {
523                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
524            }
525
526            if(staticData->subCharLen==0) {
527                staticData->subCharLen=prototype->subCharLen;
528                if(prototype->subCharLen>0) {
529                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
530                }
531            }
532        }
533    }
534
535    if(data->ucm->states.outputType<0) {
536        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
537    }
538
539    if( staticData->subChar1!=0 &&
540            (staticData->minBytesPerChar>1 ||
541                (staticData->conversionType!=UCNV_MBCS &&
542                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
543    ) {
544        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
545        *pErrorCode=U_INVALID_TABLE_FORMAT;
546    }
547}
548
549/* return TRUE if a base table was read, FALSE for an extension table */
550static UBool
551readFile(ConvData *data, const char* converterName,
552         UErrorCode *pErrorCode) {
553    char line[1024];
554    char *end;
555    FileStream *convFile;
556
557    UCMStates *baseStates;
558    UBool dataIsBase;
559
560    if(U_FAILURE(*pErrorCode)) {
561        return FALSE;
562    }
563
564    data->ucm=ucm_open();
565
566    convFile=T_FileStream_open(converterName, "r");
567    if(convFile==NULL) {
568        *pErrorCode=U_FILE_ACCESS_ERROR;
569        return FALSE;
570    }
571
572    readHeader(data, convFile, pErrorCode);
573    if(U_FAILURE(*pErrorCode)) {
574        return FALSE;
575    }
576
577    if(data->ucm->baseName[0]==0) {
578        dataIsBase=TRUE;
579        baseStates=&data->ucm->states;
580        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
581    } else {
582        dataIsBase=FALSE;
583        baseStates=NULL;
584    }
585
586    /* read the base table */
587    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
588    if(U_FAILURE(*pErrorCode)) {
589        return FALSE;
590    }
591
592    /* read an extension table if there is one */
593    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
594        end=uprv_strchr(line, 0);
595        while(line<end &&
596              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
597            --end;
598        }
599        *end=0;
600
601        if(line[0]=='#' || u_skipWhitespace(line)==end) {
602            continue; /* ignore empty and comment lines */
603        }
604
605        if(0==uprv_strcmp(line, "CHARMAP")) {
606            /* read the extension table */
607            ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
608        } else {
609            fprintf(stderr, "unexpected text after the base mapping table\n");
610        }
611        break;
612    }
613
614    T_FileStream_close(convFile);
615
616    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
617        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
618        *pErrorCode=U_INVALID_TABLE_FORMAT;
619    }
620
621    return dataIsBase;
622}
623
624static void
625createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
626    ConvData baseData;
627    UBool dataIsBase;
628
629    UConverterStaticData *staticData;
630    UCMStates *states, *baseStates;
631
632    if(U_FAILURE(*pErrorCode)) {
633        return;
634    }
635
636    initConvData(data);
637
638    dataIsBase=readFile(data, converterName, pErrorCode);
639    if(U_FAILURE(*pErrorCode)) {
640        return;
641    }
642
643    staticData=&data->staticData;
644    states=&data->ucm->states;
645
646    if(dataIsBase) {
647        /*
648         * Build a normal .cnv file with a base table
649         * and an optional extension table.
650         */
651        data->cnvData=MBCSOpen(data->ucm);
652        if(data->cnvData==NULL) {
653            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
654
655        } else if(!data->cnvData->isValid(data->cnvData,
656                            staticData->subChar, staticData->subCharLen)
657        ) {
658            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
659            *pErrorCode=U_INVALID_TABLE_FORMAT;
660
661        } else if(staticData->subChar1!=0 &&
662                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
663        ) {
664            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
665            *pErrorCode=U_INVALID_TABLE_FORMAT;
666
667        } else if(
668            data->ucm->ext->mappingsLength>0 &&
669            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
670        ) {
671            *pErrorCode=U_INVALID_TABLE_FORMAT;
672        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
673            /* sort the table so that it can be turned into UTF-8-friendly data */
674            ucm_sortTable(data->ucm->base);
675        }
676
677        if(U_SUCCESS(*pErrorCode)) {
678            if(
679                /* add the base table after ucm_checkBaseExt()! */
680                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
681            ) {
682                *pErrorCode=U_INVALID_TABLE_FORMAT;
683            } else {
684                /*
685                 * addTable() may have requested moving more mappings to the extension table
686                 * if they fit into the base toUnicode table but not into the
687                 * base fromUnicode table.
688                 * (Especially for UTF-8-friendly fromUnicode tables.)
689                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
690                 * to be excluded from the extension toUnicode data.
691                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
692                 * the base fromUnicode table.
693                 */
694                ucm_moveMappings(data->ucm->base, data->ucm->ext);
695                ucm_sortTable(data->ucm->ext);
696                if(data->ucm->ext->mappingsLength>0) {
697                    /* prepare the extension table, if there is one */
698                    data->extData=CnvExtOpen(data->ucm);
699                    if(data->extData==NULL) {
700                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
701                    } else if(
702                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
703                    ) {
704                        *pErrorCode=U_INVALID_TABLE_FORMAT;
705                    }
706                }
707            }
708        }
709    } else {
710        /* Build an extension-only .cnv file. */
711        char baseFilename[500];
712        char *basename;
713
714        initConvData(&baseData);
715
716        /* assemble a path/filename for data->ucm->baseName */
717        uprv_strcpy(baseFilename, converterName);
718        basename=(char *)findBasename(baseFilename);
719        uprv_strcpy(basename, data->ucm->baseName);
720        uprv_strcat(basename, ".ucm");
721
722        /* read the base table */
723        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
724        if(U_FAILURE(*pErrorCode)) {
725            return;
726        } else if(!dataIsBase) {
727            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
728            *pErrorCode=U_INVALID_TABLE_FORMAT;
729        } else {
730            /* prepare the extension table */
731            data->extData=CnvExtOpen(data->ucm);
732            if(data->extData==NULL) {
733                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
734            } else {
735                /* fill in gaps in extension file header fields */
736                UCMapping *m, *mLimit;
737                uint8_t fallbackFlags;
738
739                baseStates=&baseData.ucm->states;
740                if(states->conversionType==UCNV_DBCS) {
741                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
742                } else if(states->minCharLength==0) {
743                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
744                }
745                if(states->maxCharLength<states->minCharLength) {
746                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
747                }
748
749                if(staticData->subCharLen==0) {
750                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
751                    staticData->subCharLen=baseData.staticData.subCharLen;
752                }
753                /*
754                 * do not copy subChar1 -
755                 * only use what is explicitly specified
756                 * because it cannot be unset in the extension file header
757                 */
758
759                /* get the fallback flags */
760                fallbackFlags=0;
761                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
762                    m<mLimit && fallbackFlags!=3;
763                    ++m
764                ) {
765                    if(m->f==1) {
766                        fallbackFlags|=1;
767                    } else if(m->f==3) {
768                        fallbackFlags|=2;
769                    }
770                }
771
772                if(fallbackFlags&1) {
773                    staticData->hasFromUnicodeFallback=TRUE;
774                }
775                if(fallbackFlags&2) {
776                    staticData->hasToUnicodeFallback=TRUE;
777                }
778
779                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
780                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
781                    *pErrorCode=U_INVALID_TABLE_FORMAT;
782
783                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
784                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
785                    *pErrorCode=U_INVALID_TABLE_FORMAT;
786
787                } else if(
788                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
789                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
790                ) {
791                    *pErrorCode=U_INVALID_TABLE_FORMAT;
792                } else {
793                    if(states->maxCharLength>1) {
794                        /*
795                         * When building a normal .cnv file with a base table
796                         * for an MBCS (not SBCS) table with explicit precision flags,
797                         * the MBCSAddTable() function marks some mappings for moving
798                         * to the extension table.
799                         * They fit into the base toUnicode table but not into the
800                         * base fromUnicode table.
801                         * (Note: We do have explicit precision flags because they are
802                         * required for extension table generation, and
803                         * ucm_checkBaseExt() verified it.)
804                         *
805                         * We do not call MBCSAddTable() here (we probably could)
806                         * so we need to do the analysis before building the extension table.
807                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
808                         * Redundant mappings in the extension table are ok except they cost some size.
809                         *
810                         * Do this after ucm_checkBaseExt().
811                         */
812                        const MBCSData *mbcsData=MBCSGetDummy();
813                        int32_t needsMove=0;
814                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
815                            m<mLimit;
816                            ++m
817                        ) {
818                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
819                                m->f|=MBCS_FROM_U_EXT_FLAG;
820                                m->moveFlag=UCM_MOVE_TO_EXT;
821                                ++needsMove;
822                            }
823                        }
824
825                        if(needsMove!=0) {
826                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
827                            ucm_sortTable(data->ucm->ext);
828                        }
829                    }
830                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
831                        *pErrorCode=U_INVALID_TABLE_FORMAT;
832                    }
833                }
834            }
835        }
836
837        cleanupConvData(&baseData);
838    }
839}
840
841/*
842 * Hey, Emacs, please set the following:
843 *
844 * Local Variables:
845 * indent-tabs-mode: nil
846 * End:
847 *
848 */
849