1103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/*
2103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*******************************************************************************
359d709d503bab6e2b61931737e662dd293b40578ccornelius*   Copyright (C) 2011-2013, International Business Machines
4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   Corporation and others.  All Rights Reserved.
5103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*******************************************************************************
6103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   file name:  ppucd.cpp
7103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   encoding:   US-ASCII
8103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   tab size:   8 (not used)
9103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   indentation:4
10103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*
11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   created on: 2011dec11
12103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   created by: Markus W. Scherer
13103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*/
14103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utypes.h"
16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uchar.h"
17103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "charstr.h"
18103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "cstring.h"
19103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "ppucd.h"
20103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h"
21103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uparse.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include <stdio.h>
24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include <string.h>
25103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
26103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
27103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
28103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_NAMESPACE_BEGIN
29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
30103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::~PropertyNames() {}
31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
32103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusint32_t
33103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::getPropertyEnum(const char *name) const {
34103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return u_getPropertyEnum(name);
35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
36103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
37103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusint32_t
38103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
39103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return u_getPropertyValueEnum((UProperty)property, name);
40103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
41103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
42103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUniProps::UniProps()
43103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        : start(U_SENTINEL), end(U_SENTINEL),
4459d709d503bab6e2b61931737e662dd293b40578ccornelius          bmg(U_SENTINEL), bpb(U_SENTINEL),
45103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
46103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          digitValue(-1), numericValue(NULL),
47103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          name(NULL), nameAlias(NULL) {
48103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    memset(binProps, 0, sizeof(binProps));
49103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    memset(intProps, 0, sizeof(intProps));
50103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    memset(age, 0, 4);
51103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
52103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
53103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUniProps::~UniProps() {}
54103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
55103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst int32_t PreparsedUCD::kNumLineBuffers;
56103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
57103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
58103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        : icuPnames(new PropertyNames()), pnames(icuPnames),
59103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          file(NULL),
60103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
61103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          lineNumber(0),
62103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          lineType(NO_LINE),
63103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius          fieldLimit(NULL), lineLimit(NULL) {
64103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) { return; }
65103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
66103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
67103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        filename=NULL;
68103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        file=stdin;
69103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
70103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        file=fopen(filename, "r");
71103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
72103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(file==NULL) {
73103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        perror("error opening preparsed UCD");
7454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
75103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_FILE_ACCESS_ERROR;
76103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return;
77103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
79103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    memset(ucdVersion, 0, 4);
80103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    lines[0][0]=0;
81103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
82103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
83103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::~PreparsedUCD() {
84103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(file!=stdin) {
85103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fclose(file);
86103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
87103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    delete icuPnames;
88103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
89103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
90103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Same order as the LineType values.
91103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic const char *lineTypeStrings[]={
92103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    NULL,
93103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    NULL,
94103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "ucd",
95103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "property",
96103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "binary",
97103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "value",
98103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "defaults",
99103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "block",
100103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "cp",
101103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    "algnamesrange"
102103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius};
103103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
104103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::LineType
105103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::readLine(UErrorCode &errorCode) {
106103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) { return NO_LINE; }
107103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Select the next available line buffer.
108103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while(!isLineBufferAvailable(lineIndex)) {
109103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        ++lineIndex;
110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if (lineIndex == kNumLineBuffers) {
111103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            lineIndex = 0;
112103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
113103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *line=lines[lineIndex];
115103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    *line=0;
116103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    lineLimit=fieldLimit=line;
117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    lineType=NO_LINE;
118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *result=fgets(line, sizeof(lines[0]), file);
119103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(result==NULL) {
120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(ferror(file)) {
121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            perror("error reading preparsed UCD");
122103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
123103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_FILE_ACCESS_ERROR;
124103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
125103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return NO_LINE;
126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
127103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    ++lineNumber;
128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(*line=='#') {
129103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fieldLimit=strchr(line, 0);
130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return lineType=EMPTY_LINE;
131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Remove trailing /r/n.
133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char c;
134103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *limit=strchr(line, 0);
135103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Remove trailing white space.
137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
138103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    *limit=0;
139103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    lineLimit=limit;
140103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(line==limit) {
141103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fieldLimit=limit;
142103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return lineType=EMPTY_LINE;
143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
144103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Split by ';'.
145103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *semi=line;
146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    fieldLimit=strchr(line, 0);
148103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Determine the line type.
149103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t type;
150103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for(type=EMPTY_LINE+1;; ++type) {
151103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(type==LINE_TYPE_COUNT) {
152103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
153103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
154103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    line, (long)lineNumber);
155103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
156103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return NO_LINE;
157103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
158103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(0==strcmp(line, lineTypeStrings[type])) {
159103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
160103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
161103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
162103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    lineType=(LineType)type;
163103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
164103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        u_versionFromString(ucdVersion, fieldLimit+1);
165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
166103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return lineType;
167103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
169103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char *
170103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::firstField() {
171103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *field=lines[lineIndex];
172103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    fieldLimit=strchr(field, 0);
173103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return field;
174103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
175103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
176103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst char *
177103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::nextField() {
178103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(fieldLimit==lineLimit) { return NULL; }
179103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *field=fieldLimit+1;
180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    fieldLimit=strchr(field, 0);
181103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return field;
182103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
184103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UniProps *
185103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
186103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) { return NULL; }
187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    newValues.clear();
188103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(!lineHasPropertyValues()) {
189103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
190103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return NULL;
191103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
192103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    firstField();
193103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const char *field=nextField();
194103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(field==NULL) {
195103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // No range field after the type.
196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
197103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: missing default/block/cp range field "
198103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "(no second field) on line %ld\n",
199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                (long)lineNumber);
200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_PARSE_ERROR;
201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return NULL;
202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UChar32 start, end;
204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UniProps *props;
206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    switch(lineType) {
207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    case DEFAULTS_LINE:
208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(defaultLineIndex>=0) {
209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: second line with default properties on line %ld\n",
211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    (long)lineNumber);
212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return NULL;
214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(start!=0 || end!=0x10ffff) {
216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber);
219103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return NULL;
221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        props=&defaultProps;
223103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        defaultLineIndex=lineIndex;
224103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        break;
225103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    case BLOCK_LINE:
226103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        blockProps=defaultProps;  // Block inherits default properties.
227103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        props=&blockProps;
228103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        blockLineIndex=lineIndex;
229103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        break;
230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    case CP_LINE:
231103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(blockProps.start<=start && end<=blockProps.end) {
232103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Code point range fully inside the last block inherits the block properties.
233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            cpProps=blockProps;
234103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(start>blockProps.end || end<blockProps.start) {
235103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Code point range fully outside the last block inherits the default properties.
236103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            cpProps=defaultProps;
237103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
238103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Code point range partially overlapping with the last block is illegal.
239103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
240103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: cp range %s on line %ld only "
241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "partially overlaps with block range %04lX..%04lX\n",
242103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return NULL;
245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        props=&cpProps;
247103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        break;
248103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    default:
249103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // Will not occur because of the range check above.
250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
251103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return NULL;
252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    props->start=start;
254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    props->end=end;
255103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while((field=nextField())!=NULL) {
256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
258103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return props;
259103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
260103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
261103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic const struct {
262103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const char *name;
263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t prop;
264103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} ppucdProperties[]={
265103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    { "Name_Alias", PPUCD_NAME_ALIAS },
266103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
267103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
268103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius};
269103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
270103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Returns TRUE for "ok to continue parsing fields".
271103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool
272103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            UErrorCode &errorCode) {
274103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    CharString pBuffer;
275103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const char *p=field;
276103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const char *v=strchr(p, '=');
277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int binaryValue;
278103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(*p=='-') {
279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(v!=NULL) {
280103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
281103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: mix of binary-property-no and "
282103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "enum-property syntax '%s' on line %ld\n",
283103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber);
284103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
285103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return FALSE;
286103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
287103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        binaryValue=0;
288103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        ++p;
289103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(v==NULL) {
290103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        binaryValue=1;
291103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
292103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        binaryValue=-1;
293103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // Copy out the property name rather than modifying the field (writing a NUL).
294103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        pBuffer.append(p, (int32_t)(v-p), errorCode);
295103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        p=pBuffer.data();
296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        ++v;
297103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
298103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t prop=pnames->getPropertyEnum(p);
299103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(prop<0) {
300103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        for(int32_t i=0;; ++i) {
301103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(i==LENGTHOF(ppucdProperties)) {
302103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                // Ignore unknown property names.
303103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return TRUE;
304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
305103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
306103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                prop=ppucdProperties[i].prop;
307103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(prop>=0);
308103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                break;
309103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
310103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
311103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
312103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(prop<UCHAR_BINARY_LIMIT) {
313103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(binaryValue>=0) {
314103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.binProps[prop]=(UBool)binaryValue;
315103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
316103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // No binary value for a binary property.
317103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
318103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: enum-property syntax '%s' "
319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "for binary property on line %ld\n",
320103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber);
321103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
322103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
323103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(binaryValue>=0) {
324103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // Binary value for a non-binary property.
325103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
326103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: binary-property syntax '%s' "
327103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "for non-binary property on line %ld\n",
328103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                field, (long)lineNumber);
329103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_PARSE_ERROR;
33054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    } else if (prop < UCHAR_INT_START) {
33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        fprintf(stderr,
33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                prop, (long)lineNumber);
33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        errorCode=U_PARSE_ERROR;
335103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(prop<UCHAR_INT_LIMIT) {
336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        int32_t value=pnames->getPropertyValueEnum(prop, v);
337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
338103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
339103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            char *end;
340103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            unsigned long ccc=uprv_strtoul(v, &end, 10);
341103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(v<end && *end==0 && ccc<=254) {
342103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                value=(int32_t)ccc;
343103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
344103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
345103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(value==UCHAR_INVALID_CODE) {
346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
347103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
348103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber);
349103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
350103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
351103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.intProps[prop-UCHAR_INT_START]=value;
352103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
353103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(*v=='<') {
354103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // Do not parse default values like <code point>, just set null values.
355103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        switch(prop) {
356103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_BIDI_MIRRORING_GLYPH:
357103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.bmg=U_SENTINEL;
358103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
35959d709d503bab6e2b61931737e662dd293b40578ccornelius        case UCHAR_BIDI_PAIRED_BRACKET:
36059d709d503bab6e2b61931737e662dd293b40578ccornelius            props.bpb=U_SENTINEL;
36159d709d503bab6e2b61931737e662dd293b40578ccornelius            break;
362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_CASE_FOLDING:
363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.scf=U_SENTINEL;
364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
366103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.slc=U_SENTINEL;
367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_TITLECASE_MAPPING:
369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.stc=U_SENTINEL;
370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.suc=U_SENTINEL;
373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_CASE_FOLDING:
375103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.cf.remove();
376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_LOWERCASE_MAPPING:
378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.lc.remove();
379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_TITLECASE_MAPPING:
381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.tc.remove();
382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
383103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_UPPERCASE_MAPPING:
384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.uc.remove();
385103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SCRIPT_EXTENSIONS:
387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.scx.clear();
388103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        default:
390103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    field, (long)lineNumber);
393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        char c;
397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        switch(prop) {
398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_NUMERIC_VALUE:
399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.numericValue=v;
400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            c=*v;
401103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if('0'<=c && c<='9' && v[1]==0) {
402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                props.digitValue=c-'0';
403103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else {
404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                props.digitValue=-1;
405103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
406103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
407103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_NAME:
408103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.name=v;
409103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
410103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_AGE:
411103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            u_versionFromString(props.age, v);  // Writes 0.0.0.0 if v is not numeric.
412103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
413103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_BIDI_MIRRORING_GLYPH:
414103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.bmg=parseCodePoint(v, errorCode);
415103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
41659d709d503bab6e2b61931737e662dd293b40578ccornelius        case UCHAR_BIDI_PAIRED_BRACKET:
41759d709d503bab6e2b61931737e662dd293b40578ccornelius            props.bpb=parseCodePoint(v, errorCode);
41859d709d503bab6e2b61931737e662dd293b40578ccornelius            break;
419103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_CASE_FOLDING:
420103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.scf=parseCodePoint(v, errorCode);
421103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
422103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
423103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.slc=parseCodePoint(v, errorCode);
424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_TITLECASE_MAPPING:
426103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.stc=parseCodePoint(v, errorCode);
427103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
428103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
429103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.suc=parseCodePoint(v, errorCode);
430103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
431103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_CASE_FOLDING:
432103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            parseString(v, props.cf, errorCode);
433103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
434103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_LOWERCASE_MAPPING:
435103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            parseString(v, props.lc, errorCode);
436103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
437103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_TITLECASE_MAPPING:
438103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            parseString(v, props.tc, errorCode);
439103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
440103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_UPPERCASE_MAPPING:
441103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            parseString(v, props.uc, errorCode);
442103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
443103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case PPUCD_NAME_ALIAS:
444103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            props.nameAlias=v;
445103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
446103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case PPUCD_CONDITIONAL_CASE_MAPPINGS:
447103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case PPUCD_TURKIC_CASE_FOLDING:
448103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // No need to parse their values: They are hardcoded in the runtime library.
449103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
450103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case UCHAR_SCRIPT_EXTENSIONS:
451103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            parseScriptExtensions(v, props.scx, errorCode);
452103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
453103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        default:
454103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Ignore unhandled properties.
455103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return TRUE;
456103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
457103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
458103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_SUCCESS(errorCode)) {
459103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        newValues.add((UChar32)prop);
460103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return TRUE;
461103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
462103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return FALSE;
463103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
464103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
465103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
466103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool
467103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
468103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) { return FALSE; }
469103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(lineType!=ALG_NAMES_RANGE_LINE) {
470103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
471103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return FALSE;
472103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
473103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    firstField();
474103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const char *field=nextField();
475103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(field==NULL) {
476103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // No range field after the type.
477103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
478103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: missing algnamesrange range field "
479103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "(no second field) on line %ld\n",
480103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                (long)lineNumber);
481103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_PARSE_ERROR;
482103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return FALSE;
483103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
484103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return parseCodePointRange(field, start, end, errorCode);
485103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
486103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
487103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUChar32
488103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
489103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    char *end;
490103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
491103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(end<=s || *end!=0 || value>=0x110000) {
492103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
493103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
494103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                s, (long)lineNumber);
495103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_PARSE_ERROR;
496103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return U_SENTINEL;
497103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
498103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return (UChar32)value;
499103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
500103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
501103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUBool
502103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
503103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint32_t st, e;
504103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    u_parseCodePointRange(s, &st, &e, &errorCode);
505103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) {
506103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
507103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
508103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                s, (long)lineNumber);
509103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return FALSE;
510103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
511103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    start=(UChar32)st;
512103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    end=(UChar32)e;
513103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return TRUE;
514103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
515103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
516103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid
517103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
518103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    UChar *buffer=uni.getBuffer(-1);
519103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
520103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
521103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_ZERO_ERROR;
522103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        uni.releaseBuffer(0);
523103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        buffer=uni.getBuffer(length);
524103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
525103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
526103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uni.releaseBuffer(length);
527103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) {
528103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr,
529103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
530103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                s, (long)lineNumber);
531103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
532103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
533103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
534103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid
535103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusPreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
536103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U_FAILURE(errorCode)) { return; }
537103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    scx.clear();
538103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    CharString scString;
539103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for(;;) {
540103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        const char *scs;
541103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        const char *scLimit=strchr(s, ' ');
542103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(scLimit!=NULL) {
543103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
544103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(U_FAILURE(errorCode)) { return; }
545103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
546103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            scs=s;
547103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
548103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
549103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(script==UCHAR_INVALID_CODE) {
550103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
551103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
552103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    scs, (long)lineNumber);
553103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
554103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return;
555103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(scx.contains(script)) {
556103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            fprintf(stderr,
557103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
558103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    scs, (long)lineNumber);
559103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            errorCode=U_PARSE_ERROR;
560103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return;
561103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
562103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            scx.add(script);
563103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
564103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(scLimit!=NULL) {
565103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            s=scLimit+1;
566103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
567103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
568103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
569103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
570103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(scx.isEmpty()) {
571103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
572103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        errorCode=U_PARSE_ERROR;
573103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
574103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
575103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
576103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_NAMESPACE_END
577