16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2011-2013, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: ppucd.h 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2011dec11 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifndef __PPUCD_H__ 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define __PPUCD_H__ 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h> 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** Additions to the uchar.h enum UProperty. */ 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenum { 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Name_Alias */ 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PPUCD_CONDITIONAL_CASE_MAPPINGS, 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PPUCD_TURKIC_CASE_FOLDING 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass U_TOOLUTIL_API PropertyNames { 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic: 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org virtual ~PropertyNames(); 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org virtual int32_t getPropertyEnum(const char *name) const; 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct U_TOOLUTIL_API UniProps { 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UniProps(); 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ~UniProps(); 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 start, end; 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool binProps[UCHAR_BINARY_LIMIT]; 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo age; 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 bmg, bpb; 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 scf, slc, stc, suc; 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t digitValue; 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *numericValue; 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *name; 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *nameAlias; 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString cf, lc, tc, uc; 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet scx; 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass U_TOOLUTIL_API PreparsedUCD { 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic: 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org enum LineType { 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** No line, end of file. */ 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NO_LINE, 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Empty line. (Might contain a comment.) */ 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org EMPTY_LINE, 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** ucd;6.1.0 */ 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNICODE_VERSION_LINE, 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** property;Binary;Alpha;Alphabetic */ 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PROPERTY_LINE, 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** binary;N;No;F;False */ 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BINARY_LINE, 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** value;gc;Zs;Space_Separator */ 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org VALUE_LINE, 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** defaults;0000..10FFFF;age=NA;bc=L;... */ 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org DEFAULTS_LINE, 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BLOCK_LINE, 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CP_LINE, 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ALG_NAMES_RANGE_LINE, 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LINE_TYPE_COUNT 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructor. 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Prepare this object for a new, empty package. 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PreparsedUCD(const char *filename, UErrorCode &errorCode); 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Destructor. */ 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ~PreparsedUCD(); 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void setPropertyNames(const PropertyNames *pn) { pnames=pn; } 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Reads a line from the preparsed UCD file. 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Splits the line by replacing each ';' with a NUL. 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LineType readLine(UErrorCode &errorCode); 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Returns the number of the line read by readLine(). */ 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t getLineNumber() const { return lineNumber; } 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Returns the line's next field, or NULL. */ 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *nextField(); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Returns TRUE if the current line has property values. */ 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Parses properties from the current line. 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Clears newValues and sets UProperty codes for property values mentioned 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * on the current line (as opposed to being inherited). 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The returned UniProps are usable until the next line of the same type is read. 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Returns the code point range for the current algnamesrange line. 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Calls & parses nextField(). 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Further nextField() calls will yield the range's type & prefix string. 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Returns U_SUCCESS(errorCode). 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprivate: 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isLineBufferAvailable(int32_t i) { 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return defaultLineIndex!=i && blockLineIndex!=i; 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** Resets the field iterator and returns the line's first field (the line type field). */ 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *firstField(); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode); 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t kNumLineBuffers=3; 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org PropertyNames *icuPnames; // owned 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const PropertyNames *pnames; // aliased 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FILE *file; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t defaultLineIndex, blockLineIndex, lineIndex; 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lineNumber; 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LineType lineType; 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *fieldLimit; 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *lineLimit; 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo ucdVersion; 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UniProps defaultProps, blockProps, cpProps; 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Multiple lines so that default and block properties can maintain pointers 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // into their line buffers. 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char lines[kNumLineBuffers][4096]; 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif // __PPUCD_H__ 175