150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2009-2012, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: gennorm2.cpp 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009nov25 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* This program reads text files that define Unicode normalization, 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* parses them, and builds a binary data file. 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 2127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "n2builder.h" 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h> 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h> 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <string.h> 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/errorcode.h" 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h" 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/putil.h" 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uchar.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h" 3127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "charstr.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "toolutil.h" 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uoptions.h" 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uparse.h" 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_NORMALIZATION 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unewdata.h" 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool beVerbose=FALSE, haveCopyright=TRUE; 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid parseFile(FILE *f, Normalizer2DataBuilder &builder); 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* -------------------------------------------------------------------------- */ 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenum { 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho HELP_H, 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho HELP_QUESTION_MARK, 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho VERBOSE, 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho COPYRIGHT, 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho SOURCEDIR, 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho OUTPUT_FILENAME, 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNICODE_VERSION, 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho OPT_FAST 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UOption options[]={ 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_HELP_H, 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_HELP_QUESTION_MARK, 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_VERBOSE, 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_COPYRIGHT, 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_SOURCEDIR, 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG), 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UOPTION_DEF("fast", '\1', UOPT_NO_ARG) 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoextern "C" int 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehomain(int argc, char* argv[]) { 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_MAIN_INIT_ARGS(argc, argv); 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* preset then read command line options */ 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho options[SOURCEDIR].value=""; 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options); 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* error handling, printing usage message */ 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(argc<0) { 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in command line argument \"%s\"\n", 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho argv[-argc]); 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!options[OUTPUT_FILENAME].doesOccur) { 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho argc=-1; 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( argc<2 || 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Broken into chunks because the C89 standard says the minimum 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * required supported string length is 509 bytes. 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Usage: %s [-options] infiles+ -o outputfilename\n" 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\n" 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Reads the infiles with normalization data and\n" 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "creates a binary file (outputfilename) with the data.\n" 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\n", 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho argv[0]); 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Options:\n" 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-h or -? or --help this usage text\n" 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-v or --verbose verbose output\n" 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-c or --copyright include a copyright notice\n" 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-u or --unicode Unicode version, followed by the version like 5.2.0\n"); 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-s or --sourcedir source directory, followed by the path\n" 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t-o or --output output filename\n"); 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t --fast optimize the .nrm file for fast normalization,\n" 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t which might increase its size (Writes fully decomposed\n" 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t regular mappings instead of delta mappings.\n" 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t You should measure the runtime speed to make sure that\n" 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\t this is a good trade-off.)\n"); 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho beVerbose=options[VERBOSE].doesOccur; 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho haveCopyright=options[COPYRIGHT].doesOccur; 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/main()"); 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_NORMALIZATION 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 writes a dummy binary data file " 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "because UCONFIG_NO_NORMALIZATION is set, \n" 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "see icu/source/common/unicode/uconfig.h\n"); 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode); 13827f654740f2a26ad62a5c155af9199af9e69b889claireho // Should not return an error since this is the expected behaviour if UCONFIG_NO_NORMALIZATION is on. 13927f654740f2a26ad62a5c155af9199af9e69b889claireho // return U_UNSUPPORTED_ERROR; 14027f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode)); 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.assertSuccess(); 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(options[UNICODE_VERSION].doesOccur) { 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius builder->setUnicodeVersion(options[UNICODE_VERSION].value); 14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options[OPT_FAST].doesOccur) { 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST); 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // prepare the filename beginning with the source dir 15627f654740f2a26ad62a5c155af9199af9e69b889claireho CharString filename(options[SOURCEDIR].value, errorCode); 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pathLength=filename.length(); 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( pathLength>0 && 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho filename[pathLength-1]!=U_FILE_SEP_CHAR && 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 16227f654740f2a26ad62a5c155af9199af9e69b889claireho filename.append(U_FILE_SEP_CHAR, errorCode); 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pathLength=filename.length(); 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int i=1; i<argc; ++i) { 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("gennorm2: processing %s\n", argv[i]); 16827f654740f2a26ad62a5c155af9199af9e69b889claireho filename.append(argv[i], errorCode); 16927f654740f2a26ad62a5c155af9199af9e69b889claireho LocalStdioFilePointer f(fopen(filename.data(), "r")); 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(f==NULL) { 17127f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data()); 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_FILE_ACCESS_ERROR); 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS); 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho parseFile(f.getAlias(), *builder); 17627f654740f2a26ad62a5c155af9199af9e69b889claireho filename.truncate(pathLength); 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder->writeBinaryFile(options[OUTPUT_FILENAME].value); 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return errorCode.get(); 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid parseFile(FILE *f, Normalizer2DataBuilder &builder) { 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/parseFile()"); 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char line[300]; 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t startCP, endCP; 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(NULL!=fgets(line, (int)sizeof(line), f)) { 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *comment=(char *)strchr(line, '#'); 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(comment!=NULL) { 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *comment=0; 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_rtrim(line); 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(line[0]==0) { 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; // skip empty and comment-only lines 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(line[0]=='*') { 20283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *s=u_skipWhitespace(line+1); 20383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(0==strncmp(s, "Unicode", 7)) { 20483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius s=u_skipWhitespace(s+7); 20583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius builder.setUnicodeVersion(s); 20683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; // reserved syntax 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *delimiter; 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t rangeLength= 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode); 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.isFailure()) { 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line); 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(errorCode.reset()); 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delimiter=u_skipWhitespace(delimiter); 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(*delimiter==':') { 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *s=u_skipWhitespace(delimiter+1); 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *end; 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unsigned long value=strtoul(s, &end, 10); 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) { 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line); 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_PARSE_ERROR); 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.setCC(c, (uint8_t)value); 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(*delimiter=='-') { 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(*u_skipWhitespace(delimiter+1)!=0) { 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line); 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_PARSE_ERROR); 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.removeMapping(c); 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(*delimiter=='=' || *delimiter=='>') { 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK]; 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode); 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.isFailure()) { 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line); 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(errorCode.reset()); 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString mapping(FALSE, uchars, length); 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(*delimiter=='=') { 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(rangeLength!=1) { 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: round-trip mapping for more than 1 code point on %s\n", 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho line); 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_PARSE_ERROR); 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.setRoundTripMapping((UChar32)startCP, mapping); 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) { 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.setOneWayMapping(c, mapping); 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line); 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_PARSE_ERROR); 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // !UCONFIG_NO_NORMALIZATION 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Hey, Emacs, please set the following: 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Local Variables: 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * indent-tabs-mode: nil 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * End: 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 280