183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/*
283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*******************************************************************************
359d709d503bab6e2b61931737e662dd293b40578ccornelius*   Copyright (C) 2011-2013, International Business Machines
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Corporation and others.  All Rights Reserved.
583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*******************************************************************************
683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   file name:  ppucd.h
783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   encoding:   US-ASCII
883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   tab size:   8 (not used)
983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   indentation:4
1083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*
1183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   created on: 2011dec11
1283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   created by: Markus W. Scherer
1383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*/
1483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#ifndef __PPUCD_H__
1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define __PPUCD_H__
1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utypes.h"
1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/uniset.h"
2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/unistr.h"
2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include <stdio.h>
2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Additions to the uchar.h enum UProperty. */
2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusenum {
2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Name_Alias */
2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    PPUCD_CONDITIONAL_CASE_MAPPINGS,
2983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    PPUCD_TURKIC_CASE_FOLDING
3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius};
3183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_BEGIN
3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusclass U_TOOLUTIL_API PropertyNames {
3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliuspublic:
3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    virtual ~PropertyNames();
3783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    virtual int32_t getPropertyEnum(const char *name) const;
3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius};
4083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
4183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstruct U_TOOLUTIL_API UniProps {
4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UniProps();
4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    ~UniProps();
4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }
4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32 start, end;
4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool binProps[UCHAR_BINARY_LIMIT];
4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UVersionInfo age;
5159d709d503bab6e2b61931737e662dd293b40578ccornelius    UChar32 bmg, bpb;
5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32 scf, slc, stc, suc;
5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t digitValue;
5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const char *numericValue;
5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const char *name;
5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const char *nameAlias;
5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UnicodeString cf, lc, tc, uc;
5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UnicodeSet scx;
5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius};
6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusclass U_TOOLUTIL_API PreparsedUCD {
6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliuspublic:
6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    enum LineType {
6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** No line, end of file. */
6583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        NO_LINE,
6683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** Empty line. (Might contain a comment.) */
6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        EMPTY_LINE,
6883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** ucd;6.1.0 */
7083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        UNICODE_VERSION_LINE,
7183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** property;Binary;Alpha;Alphabetic */
7383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        PROPERTY_LINE,
7483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** binary;N;No;F;False */
7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        BINARY_LINE,
7683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** value;gc;Zs;Space_Separator */
7783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        VALUE_LINE,
7883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
7983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** defaults;0000..10FFFF;age=NA;bc=L;... */
8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        DEFAULTS_LINE,
8183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
8283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        BLOCK_LINE,
8383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
8483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        CP_LINE,
8583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
8683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
8783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        ALG_NAMES_RANGE_LINE,
8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        LINE_TYPE_COUNT
9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    };
9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
9283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /**
9383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Constructor.
9483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Prepare this object for a new, empty package.
9583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     */
9683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    PreparsedUCD(const char *filename, UErrorCode &errorCode);
9783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
9883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Destructor. */
9983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    ~PreparsedUCD();
10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
10183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
10283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
10383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /**
10583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Reads a line from the preparsed UCD file.
10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Splits the line by replacing each ';' with a NUL.
10783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     */
10883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    LineType readLine(UErrorCode &errorCode);
10983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Returns the number of the line read by readLine(). */
11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t getLineNumber() const { return lineNumber; }
11283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Returns the line's next field, or NULL. */
11483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const char *nextField();
11583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
11883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Returns TRUE if the current line has property values. */
12083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; }
12183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
12283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /**
12383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Parses properties from the current line.
12483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Clears newValues and sets UProperty codes for property values mentioned
12583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * on the current line (as opposed to being inherited).
12683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
12783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * The returned UniProps are usable until the next line of the same type is read.
12883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     */
12983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
13083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
13183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /**
13283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Returns the code point range for the current algnamesrange line.
13383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Calls & parses nextField().
13483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Further nextField() calls will yield the range's type & prefix string.
13583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * Returns U_SUCCESS(errorCode).
13683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     */
13783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
13883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
13983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusprivate:
14083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool isLineBufferAvailable(int32_t i) {
14183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        return defaultLineIndex!=i && blockLineIndex!=i;
14283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
14383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
14483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    /** Resets the field iterator and returns the line's first field (the line type field). */
14583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const char *firstField();
14683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                        UErrorCode &errorCode);
14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
15083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
15183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
15283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);
15383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
15483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    static const int32_t kNumLineBuffers=3;
15583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
15683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    PropertyNames *icuPnames;  // owned
15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    const PropertyNames *pnames;  // aliased
15883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    FILE *file;
15983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t defaultLineIndex, blockLineIndex, lineIndex;
16083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t lineNumber;
16183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    LineType lineType;
16283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    char *fieldLimit;
16383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    char *lineLimit;
16483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
16583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UVersionInfo ucdVersion;
16683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UniProps defaultProps, blockProps, cpProps;
16783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    // Multiple lines so that default and block properties can maintain pointers
16883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    // into their line buffers.
16983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    char lines[kNumLineBuffers][4096];
17083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius};
17183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
17283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_END
17383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
17483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif  // __PPUCD_H__
175