1103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/* 2103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius******************************************************************************* 359d709d503bab6e2b61931737e662dd293b40578ccornelius* Copyright (C) 2011-2013, International Business Machines 4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* Corporation and others. All Rights Reserved. 5103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius******************************************************************************* 6103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* file name: ppucd.cpp 7103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* encoding: US-ASCII 8103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* tab size: 8 (not used) 9103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* indentation:4 10103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* 11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* created on: 2011dec11 12103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* created by: Markus W. Scherer 13103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*/ 14103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utypes.h" 16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uchar.h" 17103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "charstr.h" 18103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "cstring.h" 19103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "ppucd.h" 20103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h" 21103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uparse.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include <stdio.h> 24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include <string.h> 25103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 26103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 27103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 28103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_NAMESPACE_BEGIN 29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 30103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::~PropertyNames() {} 31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 32103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusint32_t 33103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::getPropertyEnum(const char *name) const { 34103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return u_getPropertyEnum(name); 35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 36103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 37103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusint32_t 38103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::getPropertyValueEnum(int32_t property, const char *name) const { 39103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return u_getPropertyValueEnum((UProperty)property, name); 40103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 41103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 42103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUniProps::UniProps() 43103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius : start(U_SENTINEL), end(U_SENTINEL), 4459d709d503bab6e2b61931737e662dd293b40578ccornelius bmg(U_SENTINEL), bpb(U_SENTINEL), 45103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL), 46103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius digitValue(-1), numericValue(NULL), 47103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius name(NULL), nameAlias(NULL) { 48103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius memset(binProps, 0, sizeof(binProps)); 49103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius memset(intProps, 0, sizeof(intProps)); 50103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius memset(age, 0, 4); 51103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 52103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 53103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUniProps::~UniProps() {} 54103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 55103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst int32_t PreparsedUCD::kNumLineBuffers; 56103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 57103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode) 58103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius : icuPnames(new PropertyNames()), pnames(icuPnames), 59103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius file(NULL), 60103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0), 61103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineNumber(0), 62103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineType(NO_LINE), 63103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit(NULL), lineLimit(NULL) { 64103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return; } 65103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 66103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) { 67103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius filename=NULL; 68103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius file=stdin; 69103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 70103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius file=fopen(filename, "r"); 71103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 72103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(file==NULL) { 73103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius perror("error opening preparsed UCD"); 7454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\""); 75103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_FILE_ACCESS_ERROR; 76103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return; 77103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 79103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius memset(ucdVersion, 0, 4); 80103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lines[0][0]=0; 81103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 82103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 83103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::~PreparsedUCD() { 84103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(file!=stdin) { 85103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fclose(file); 86103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 87103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius delete icuPnames; 88103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 89103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 90103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Same order as the LineType values. 91103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic const char *lineTypeStrings[]={ 92103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 93103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 94103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "ucd", 95103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "property", 96103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "binary", 97103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "value", 98103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "defaults", 99103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "block", 100103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "cp", 101103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "algnamesrange" 102103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}; 103103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 104103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::LineType 105103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::readLine(UErrorCode &errorCode) { 106103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return NO_LINE; } 107103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Select the next available line buffer. 108103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while(!isLineBufferAvailable(lineIndex)) { 109103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ++lineIndex; 110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (lineIndex == kNumLineBuffers) { 111103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineIndex = 0; 112103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 113103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *line=lines[lineIndex]; 115103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *line=0; 116103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineLimit=fieldLimit=line; 117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineType=NO_LINE; 118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *result=fgets(line, sizeof(lines[0]), file); 119103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(result==NULL) { 120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(ferror(file)) { 121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius perror("error reading preparsed UCD"); 122103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber); 123103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_FILE_ACCESS_ERROR; 124103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 125103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NO_LINE; 126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 127103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ++lineNumber; 128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(*line=='#') { 129103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit=strchr(line, 0); 130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return lineType=EMPTY_LINE; 131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Remove trailing /r/n. 133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char c; 134103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *limit=strchr(line, 0); 135103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; } 136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Remove trailing white space. 137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; } 138103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *limit=0; 139103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineLimit=limit; 140103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(line==limit) { 141103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit=limit; 142103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return lineType=EMPTY_LINE; 143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 144103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Split by ';'. 145103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *semi=line; 146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; } 147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit=strchr(line, 0); 148103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Determine the line type. 149103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t type; 150103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for(type=EMPTY_LINE+1;; ++type) { 151103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(type==LINE_TYPE_COUNT) { 152103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 153103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n", 154103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius line, (long)lineNumber); 155103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 156103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NO_LINE; 157103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 158103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(0==strcmp(line, lineTypeStrings[type])) { 159103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 160103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 161103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 162103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius lineType=(LineType)type; 163103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) { 164103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius u_versionFromString(ucdVersion, fieldLimit+1); 165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 166103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return lineType; 167103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 169103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char * 170103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::firstField() { 171103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *field=lines[lineIndex]; 172103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit=strchr(field, 0); 173103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return field; 174103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 175103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 176103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char * 177103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::nextField() { 178103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(fieldLimit==lineLimit) { return NULL; } 179103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *field=fieldLimit+1; 180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fieldLimit=strchr(field, 0); 181103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return field; 182103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 184103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UniProps * 185103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) { 186103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return NULL; } 187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius newValues.clear(); 188103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(!lineHasPropertyValues()) { 189103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_ILLEGAL_ARGUMENT_ERROR; 190103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 191103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius firstField(); 193103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *field=nextField(); 194103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(field==NULL) { 195103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // No range field after the type. 196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 197103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: missing default/block/cp range field " 198103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "(no second field) on line %ld\n", 199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (long)lineNumber); 200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 start, end; 204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; } 205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UniProps *props; 206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius switch(lineType) { 207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case DEFAULTS_LINE: 208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(defaultLineIndex>=0) { 209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: second line with default properties on line %ld\n", 211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (long)lineNumber); 212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(start!=0 || end!=0x10ffff) { 216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n", 218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 219103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props=&defaultProps; 223103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius defaultLineIndex=lineIndex; 224103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 225103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case BLOCK_LINE: 226103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius blockProps=defaultProps; // Block inherits default properties. 227103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props=&blockProps; 228103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius blockLineIndex=lineIndex; 229103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case CP_LINE: 231103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(blockProps.start<=start && end<=blockProps.end) { 232103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Code point range fully inside the last block inherits the block properties. 233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius cpProps=blockProps; 234103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(start>blockProps.end || end<blockProps.start) { 235103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Code point range fully outside the last block inherits the default properties. 236103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius cpProps=defaultProps; 237103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 238103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Code point range partially overlapping with the last block is illegal. 239103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 240103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: cp range %s on line %ld only " 241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "partially overlaps with block range %04lX..%04lX\n", 242103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end); 243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props=&cpProps; 247103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 248103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius default: 249103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Will not occur because of the range check above. 250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_ILLEGAL_ARGUMENT_ERROR; 251103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return NULL; 252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props->start=start; 254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props->end=end; 255103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while((field=nextField())!=NULL) { 256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; } 257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 258103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return props; 259103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 260103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 261103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic const struct { 262103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *name; 263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t prop; 264103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} ppucdProperties[]={ 265103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius { "Name_Alias", PPUCD_NAME_ALIAS }, 266103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS }, 267103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING } 268103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}; 269103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 270103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Returns TRUE for "ok to continue parsing fields". 271103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool 272103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, 273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UErrorCode &errorCode) { 274103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CharString pBuffer; 275103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *p=field; 276103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *v=strchr(p, '='); 277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int binaryValue; 278103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(*p=='-') { 279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(v!=NULL) { 280103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 281103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: mix of binary-property-no and " 282103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "enum-property syntax '%s' on line %ld\n", 283103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 284103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 285103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return FALSE; 286103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 287103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius binaryValue=0; 288103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ++p; 289103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(v==NULL) { 290103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius binaryValue=1; 291103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 292103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius binaryValue=-1; 293103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Copy out the property name rather than modifying the field (writing a NUL). 294103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius pBuffer.append(p, (int32_t)(v-p), errorCode); 295103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius p=pBuffer.data(); 296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ++v; 297103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 298103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t prop=pnames->getPropertyEnum(p); 299103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(prop<0) { 300103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for(int32_t i=0;; ++i) { 301103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(i==LENGTHOF(ppucdProperties)) { 302103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Ignore unknown property names. 303103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return TRUE; 304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 305103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(0==uprv_stricmp(p, ppucdProperties[i].name)) { 306103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius prop=ppucdProperties[i].prop; 307103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(prop>=0); 308103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 309103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 310103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 311103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 312103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(prop<UCHAR_BINARY_LIMIT) { 313103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(binaryValue>=0) { 314103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.binProps[prop]=(UBool)binaryValue; 315103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 316103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // No binary value for a binary property. 317103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 318103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: enum-property syntax '%s' " 319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "for binary property on line %ld\n", 320103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 321103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 322103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 323103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(binaryValue>=0) { 324103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Binary value for a non-binary property. 325103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 326103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: binary-property syntax '%s' " 327103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "for non-binary property on line %ld\n", 328103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 329103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 33054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } else if (prop < UCHAR_INT_START) { 33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fprintf(stderr, 33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n", 33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius prop, (long)lineNumber); 33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius errorCode=U_PARSE_ERROR; 335103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(prop<UCHAR_INT_LIMIT) { 336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t value=pnames->getPropertyValueEnum(prop, v); 337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) { 338103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work. 339103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *end; 340103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius unsigned long ccc=uprv_strtoul(v, &end, 10); 341103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(v<end && *end==0 && ccc<=254) { 342103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius value=(int32_t)ccc; 343103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 344103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 345103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(value==UCHAR_INVALID_CODE) { 346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 347103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid value on line %ld\n", 348103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 349103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 350103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 351103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.intProps[prop-UCHAR_INT_START]=value; 352103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 353103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(*v=='<') { 354103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Do not parse default values like <code point>, just set null values. 355103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius switch(prop) { 356103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_BIDI_MIRRORING_GLYPH: 357103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.bmg=U_SENTINEL; 358103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 35959d709d503bab6e2b61931737e662dd293b40578ccornelius case UCHAR_BIDI_PAIRED_BRACKET: 36059d709d503bab6e2b61931737e662dd293b40578ccornelius props.bpb=U_SENTINEL; 36159d709d503bab6e2b61931737e662dd293b40578ccornelius break; 362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_CASE_FOLDING: 363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.scf=U_SENTINEL; 364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_LOWERCASE_MAPPING: 366103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.slc=U_SENTINEL; 367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_TITLECASE_MAPPING: 369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.stc=U_SENTINEL; 370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_UPPERCASE_MAPPING: 372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.suc=U_SENTINEL; 373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_CASE_FOLDING: 375103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.cf.remove(); 376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_LOWERCASE_MAPPING: 378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.lc.remove(); 379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_TITLECASE_MAPPING: 381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.tc.remove(); 382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 383103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_UPPERCASE_MAPPING: 384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.uc.remove(); 385103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SCRIPT_EXTENSIONS: 387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.scx.clear(); 388103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius default: 390103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid default value on line %ld\n", 392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius field, (long)lineNumber); 393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char c; 397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius switch(prop) { 398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_NUMERIC_VALUE: 399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.numericValue=v; 400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c=*v; 401103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if('0'<=c && c<='9' && v[1]==0) { 402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.digitValue=c-'0'; 403103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.digitValue=-1; 405103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 406103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 407103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_NAME: 408103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.name=v; 409103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 410103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_AGE: 411103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric. 412103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 413103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_BIDI_MIRRORING_GLYPH: 414103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.bmg=parseCodePoint(v, errorCode); 415103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 41659d709d503bab6e2b61931737e662dd293b40578ccornelius case UCHAR_BIDI_PAIRED_BRACKET: 41759d709d503bab6e2b61931737e662dd293b40578ccornelius props.bpb=parseCodePoint(v, errorCode); 41859d709d503bab6e2b61931737e662dd293b40578ccornelius break; 419103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_CASE_FOLDING: 420103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.scf=parseCodePoint(v, errorCode); 421103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 422103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_LOWERCASE_MAPPING: 423103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.slc=parseCodePoint(v, errorCode); 424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_TITLECASE_MAPPING: 426103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.stc=parseCodePoint(v, errorCode); 427103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 428103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SIMPLE_UPPERCASE_MAPPING: 429103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.suc=parseCodePoint(v, errorCode); 430103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 431103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_CASE_FOLDING: 432103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius parseString(v, props.cf, errorCode); 433103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 434103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_LOWERCASE_MAPPING: 435103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius parseString(v, props.lc, errorCode); 436103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 437103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_TITLECASE_MAPPING: 438103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius parseString(v, props.tc, errorCode); 439103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 440103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_UPPERCASE_MAPPING: 441103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius parseString(v, props.uc, errorCode); 442103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 443103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case PPUCD_NAME_ALIAS: 444103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius props.nameAlias=v; 445103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 446103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case PPUCD_CONDITIONAL_CASE_MAPPINGS: 447103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case PPUCD_TURKIC_CASE_FOLDING: 448103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // No need to parse their values: They are hardcoded in the runtime library. 449103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 450103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case UCHAR_SCRIPT_EXTENSIONS: 451103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius parseScriptExtensions(v, props.scx, errorCode); 452103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 453103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius default: 454103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Ignore unhandled properties. 455103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return TRUE; 456103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 457103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 458103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_SUCCESS(errorCode)) { 459103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius newValues.add((UChar32)prop); 460103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return TRUE; 461103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 462103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return FALSE; 463103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 464103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 465103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 466103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool 467103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) { 468103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return FALSE; } 469103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(lineType!=ALG_NAMES_RANGE_LINE) { 470103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_ILLEGAL_ARGUMENT_ERROR; 471103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return FALSE; 472103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 473103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius firstField(); 474103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *field=nextField(); 475103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(field==NULL) { 476103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // No range field after the type. 477103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 478103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: missing algnamesrange range field " 479103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "(no second field) on line %ld\n", 480103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (long)lineNumber); 481103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 482103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return FALSE; 483103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 484103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return parseCodePointRange(field, start, end, errorCode); 485103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 486103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 487103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUChar32 488103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) { 489103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius char *end; 490103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16); 491103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(end<=s || *end!=0 || value>=0x110000) { 492103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 493103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid code point on line %ld\n", 494103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius s, (long)lineNumber); 495103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 496103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return U_SENTINEL; 497103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 498103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return (UChar32)value; 499103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 500103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 501103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool 502103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) { 503103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint32_t st, e; 504103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius u_parseCodePointRange(s, &st, &e, &errorCode); 505103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { 506103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 507103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n", 508103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius s, (long)lineNumber); 509103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return FALSE; 510103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 511103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius start=(UChar32)st; 512103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius end=(UChar32)e; 513103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return TRUE; 514103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 515103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 516103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid 517103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) { 518103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar *buffer=uni.getBuffer(-1); 519103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); 520103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 521103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_ZERO_ERROR; 522103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uni.releaseBuffer(0); 523103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius buffer=uni.getBuffer(length); 524103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); 525103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 526103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uni.releaseBuffer(length); 527103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { 528103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 529103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n", 530103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius s, (long)lineNumber); 531103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 532103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 533103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 534103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid 535103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) { 536103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return; } 537103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scx.clear(); 538103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CharString scString; 539103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for(;;) { 540103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *scs; 541103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *scLimit=strchr(s, ' '); 542103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(scLimit!=NULL) { 543103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data(); 544103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U_FAILURE(errorCode)) { return; } 545103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 546103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scs=s; 547103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 548103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs); 549103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(script==UCHAR_INVALID_CODE) { 550103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 551103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: '%s' is not a valid script code on line %ld\n", 552103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scs, (long)lineNumber); 553103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 554103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return; 555103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(scx.contains(script)) { 556103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, 557103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n", 558103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scs, (long)lineNumber); 559103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 560103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return; 561103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 562103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius scx.add(script); 563103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 564103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(scLimit!=NULL) { 565103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius s=scLimit+1; 566103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 567103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 568103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 569103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 570103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(scx.isEmpty()) { 571103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber); 572103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errorCode=U_PARSE_ERROR; 573103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 574103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 575103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 576103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_NAMESPACE_END 577