150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Copyright (C) 2009-2012, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  gennorm2.cpp
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov25
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   This program reads text files that define Unicode normalization,
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   parses them, and builds a binary data file.
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
2127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "n2builder.h"
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h>
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h>
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <string.h>
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/errorcode.h"
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h"
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/putil.h"
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uchar.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
3127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "charstr.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "toolutil.h"
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uoptions.h"
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uparse.h"
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_NORMALIZATION
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unewdata.h"
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool beVerbose=FALSE, haveCopyright=TRUE;
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid parseFile(FILE *f, Normalizer2DataBuilder &builder);
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* -------------------------------------------------------------------------- */
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenum {
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    HELP_H,
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    HELP_QUESTION_MARK,
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    VERBOSE,
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    COPYRIGHT,
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    SOURCEDIR,
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    OUTPUT_FILENAME,
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNICODE_VERSION,
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    OPT_FAST
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UOption options[]={
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_HELP_H,
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_HELP_QUESTION_MARK,
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_VERBOSE,
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_COPYRIGHT,
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_SOURCEDIR,
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoextern "C" int
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehomain(int argc, char* argv[]) {
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_MAIN_INIT_ARGS(argc, argv);
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* preset then read command line options */
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    options[SOURCEDIR].value="";
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* error handling, printing usage message */
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(argc<0) {
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "error in command line argument \"%s\"\n",
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            argv[-argc]);
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!options[OUTPUT_FILENAME].doesOccur) {
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        argc=-1;
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( argc<2 ||
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Broken into chunks because the C89 standard says the minimum
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * required supported string length is 509 bytes.
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "Usage: %s [-options] infiles+ -o outputfilename\n"
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\n"
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "Reads the infiles with normalization data and\n"
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "creates a binary file (outputfilename) with the data.\n"
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\n",
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            argv[0]);
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "Options:\n"
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-h or -? or --help  this usage text\n"
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-v or --verbose     verbose output\n"
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-c or --copyright   include a copyright notice\n"
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-u or --unicode     Unicode version, followed by the version like 5.2.0\n");
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-s or --sourcedir   source directory, followed by the path\n"
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t-o or --output      output filename\n");
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t      --fast        optimize the .nrm file for fast normalization,\n"
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t                    which might increase its size  (Writes fully decomposed\n"
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t                    regular mappings instead of delta mappings.\n"
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t                    You should measure the runtime speed to make sure that\n"
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            "\t                    this is a good trade-off.)\n");
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    beVerbose=options[VERBOSE].doesOccur;
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    haveCopyright=options[COPYRIGHT].doesOccur;
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuToolErrorCode errorCode("gennorm2/main()");
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_NORMALIZATION
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fprintf(stderr,
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        "gennorm2 writes a dummy binary data file "
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        "because UCONFIG_NO_NORMALIZATION is set, \n"
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        "see icu/source/common/unicode/uconfig.h\n");
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
13827f654740f2a26ad62a5c155af9199af9e69b889claireho    // Should not return an error since this is the expected behaviour if UCONFIG_NO_NORMALIZATION is on.
13927f654740f2a26ad62a5c155af9199af9e69b889claireho    // return U_UNSUPPORTED_ERROR;
14027f654740f2a26ad62a5c155af9199af9e69b889claireho    return 0;
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode));
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    errorCode.assertSuccess();
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if(options[UNICODE_VERSION].doesOccur) {
14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        builder->setUnicodeVersion(options[UNICODE_VERSION].value);
14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(options[OPT_FAST].doesOccur) {
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // prepare the filename beginning with the source dir
15627f654740f2a26ad62a5c155af9199af9e69b889claireho    CharString filename(options[SOURCEDIR].value, errorCode);
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t pathLength=filename.length();
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( pathLength>0 &&
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        filename[pathLength-1]!=U_FILE_SEP_CHAR &&
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
16227f654740f2a26ad62a5c155af9199af9e69b889claireho        filename.append(U_FILE_SEP_CHAR, errorCode);
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pathLength=filename.length();
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int i=1; i<argc; ++i) {
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("gennorm2: processing %s\n", argv[i]);
16827f654740f2a26ad62a5c155af9199af9e69b889claireho        filename.append(argv[i], errorCode);
16927f654740f2a26ad62a5c155af9199af9e69b889claireho        LocalStdioFilePointer f(fopen(filename.data(), "r"));
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(f==NULL) {
17127f654740f2a26ad62a5c155af9199af9e69b889claireho            fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data());
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_FILE_ACCESS_ERROR);
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        parseFile(f.getAlias(), *builder);
17627f654740f2a26ad62a5c155af9199af9e69b889claireho        filename.truncate(pathLength);
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return errorCode.get();
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid parseFile(FILE *f, Normalizer2DataBuilder &builder) {
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuToolErrorCode errorCode("gennorm2/parseFile()");
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    char line[300];
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t startCP, endCP;
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(NULL!=fgets(line, (int)sizeof(line), f)) {
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        char *comment=(char *)strchr(line, '#');
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(comment!=NULL) {
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *comment=0;
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        u_rtrim(line);
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(line[0]==0) {
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;  // skip empty and comment-only lines
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(line[0]=='*') {
20283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            const char *s=u_skipWhitespace(line+1);
20383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            if(0==strncmp(s, "Unicode", 7)) {
20483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                s=u_skipWhitespace(s+7);
20583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                builder.setUnicodeVersion(s);
20683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            }
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;  // reserved syntax
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const char *delimiter;
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t rangeLength=
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(errorCode.isFailure()) {
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(errorCode.reset());
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delimiter=u_skipWhitespace(delimiter);
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(*delimiter==':') {
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const char *s=u_skipWhitespace(delimiter+1);
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            char *end;
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            unsigned long value=strtoul(s, &end, 10);
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_PARSE_ERROR);
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                builder.setCC(c, (uint8_t)value);
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(*delimiter=='-') {
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(*u_skipWhitespace(delimiter+1)!=0) {
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_PARSE_ERROR);
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                builder.removeMapping(c);
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(*delimiter=='=' || *delimiter=='>') {
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode);
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(errorCode.isFailure()) {
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(errorCode.reset());
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString mapping(FALSE, uchars, length);
24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(*delimiter=='=') {
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(rangeLength!=1) {
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fprintf(stderr,
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            line);
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    exit(U_PARSE_ERROR);
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                builder.setRoundTripMapping((UChar32)startCP, mapping);
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    builder.setOneWayMapping(c, mapping);
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_PARSE_ERROR);
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // !UCONFIG_NO_NORMALIZATION
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Hey, Emacs, please set the following:
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Local Variables:
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * indent-tabs-mode: nil
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * End:
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
280