183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/* 283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius******************************************************************************* 359d709d503bab6e2b61931737e662dd293b40578ccornelius* Copyright (C) 2011-2013, International Business Machines 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Corporation and others. All Rights Reserved. 583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius******************************************************************************* 683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* file name: ppucd.h 783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* encoding: US-ASCII 883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* tab size: 8 (not used) 983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* indentation:4 1083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* 1183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* created on: 2011dec11 1283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* created by: Markus W. Scherer 1383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*/ 1483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#ifndef __PPUCD_H__ 1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define __PPUCD_H__ 1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utypes.h" 1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/uniset.h" 2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/unistr.h" 2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include <stdio.h> 2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Additions to the uchar.h enum UProperty. */ 2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusenum { 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Name_Alias */ 2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, 2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PPUCD_CONDITIONAL_CASE_MAPPINGS, 2983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PPUCD_TURKIC_CASE_FOLDING 3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}; 3183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_BEGIN 3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusclass U_TOOLUTIL_API PropertyNames { 3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliuspublic: 3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius virtual ~PropertyNames(); 3783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius virtual int32_t getPropertyEnum(const char *name) const; 3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; 3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}; 4083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 4183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstruct U_TOOLUTIL_API UniProps { 4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UniProps(); 4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ~UniProps(); 4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } 4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 start, end; 4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool binProps[UCHAR_BINARY_LIMIT]; 4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; 5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UVersionInfo age; 5159d709d503bab6e2b61931737e662dd293b40578ccornelius UChar32 bmg, bpb; 5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 scf, slc, stc, suc; 5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t digitValue; 5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *numericValue; 5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *name; 5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *nameAlias; 5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString cf, lc, tc, uc; 5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeSet scx; 5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}; 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusclass U_TOOLUTIL_API PreparsedUCD { 6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliuspublic: 6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius enum LineType { 6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** No line, end of file. */ 6583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius NO_LINE, 6683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Empty line. (Might contain a comment.) */ 6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius EMPTY_LINE, 6883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** ucd;6.1.0 */ 7083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UNICODE_VERSION_LINE, 7183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** property;Binary;Alpha;Alphabetic */ 7383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PROPERTY_LINE, 7483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** binary;N;No;F;False */ 7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius BINARY_LINE, 7683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** value;gc;Zs;Space_Separator */ 7783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius VALUE_LINE, 7883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 7983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** defaults;0000..10FFFF;age=NA;bc=L;... */ 8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius DEFAULTS_LINE, 8183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ 8283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius BLOCK_LINE, 8383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ 8483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius CP_LINE, 8583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 8683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ 8783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ALG_NAMES_RANGE_LINE, 8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius LINE_TYPE_COUNT 9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius }; 9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 9283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** 9383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Constructor. 9483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Prepare this object for a new, empty package. 9583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius */ 9683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PreparsedUCD(const char *filename, UErrorCode &errorCode); 9783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 9883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Destructor. */ 9983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ~PreparsedUCD(); 10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ 10283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius void setPropertyNames(const PropertyNames *pn) { pnames=pn; } 10383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** 10583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Reads a line from the preparsed UCD file. 10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Splits the line by replacing each ';' with a NUL. 10783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius */ 10883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius LineType readLine(UErrorCode &errorCode); 10983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Returns the number of the line read by readLine(). */ 11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t getLineNumber() const { return lineNumber; } 11283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Returns the line's next field, or NULL. */ 11483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *nextField(); 11583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ 11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } 11883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Returns TRUE if the current line has property values. */ 12083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } 12183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 12283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** 12383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Parses properties from the current line. 12483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Clears newValues and sets UProperty codes for property values mentioned 12583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * on the current line (as opposed to being inherited). 12683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. 12783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * The returned UniProps are usable until the next line of the same type is read. 12883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius */ 12983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); 13083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 13183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** 13283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Returns the code point range for the current algnamesrange line. 13383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Calls & parses nextField(). 13483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Further nextField() calls will yield the range's type & prefix string. 13583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Returns U_SUCCESS(errorCode). 13683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius */ 13783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); 13883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 13983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusprivate: 14083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool isLineBufferAvailable(int32_t i) { 14183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return defaultLineIndex!=i && blockLineIndex!=i; 14283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 14383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 14483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /** Resets the field iterator and returns the line's first field (the line type field). */ 14583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *firstField(); 14683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode &errorCode); 14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); 15083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); 15183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); 15283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); 15383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 15483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius static const int32_t kNumLineBuffers=3; 15583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 15683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius PropertyNames *icuPnames; // owned 15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const PropertyNames *pnames; // aliased 15883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius FILE *file; 15983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t defaultLineIndex, blockLineIndex, lineIndex; 16083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t lineNumber; 16183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius LineType lineType; 16283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius char *fieldLimit; 16383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius char *lineLimit; 16483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 16583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UVersionInfo ucdVersion; 16683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UniProps defaultProps, blockProps, cpProps; 16783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Multiple lines so that default and block properties can maintain pointers 16883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // into their line buffers. 16983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius char lines[kNumLineBuffers][4096]; 17083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}; 17183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 17283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_END 17383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 17483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif // __PPUCD_H__ 175