185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho******************************************************************************* 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 427f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 2002-2010, International Business Machines 585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Corporation and others. All Rights Reserved. 685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho******************************************************************************* 885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* file name: propsvec.h 985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* encoding: US-ASCII 1085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* tab size: 8 (not used) 1185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* indentation:4 1285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 1385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* created on: 2002feb22 1485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* created by: Markus W. Scherer 1585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 1685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Store bits (Unicode character properties) in bit set vectors. 1785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*/ 1885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifndef __UPROPSVEC_H__ 2085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define __UPROPSVEC_H__ 2185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 2285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "unicode/utypes.h" 2385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie.h" 2485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2.h" 2585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 2685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_BEGIN 2785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 2885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 2985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Unicode Properties Vectors associated with code point ranges. 3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Rows of uint32_t integers in a contiguous array store 3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * the range limits and the properties vectors. 3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 3485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Logically, each row has a certain number of uint32_t values, 3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which is set via the upvec_open() "columns" parameter. 3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Internally, two additional columns are stored. 3885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In each internal row, 3985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * row[0] contains the start code point and 4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * row[1] contains the limit code point, 4185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which is the start of the next range. 4285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 4385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Initially, there is only one "normal" row for 4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * range [0..0x110000[ with values 0. 4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. 4685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 4785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * It would be possible to store only one range boundary per row, 4885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * but self-contained rows allow to later sort them by contents. 4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 5085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostruct UPropsVectors; 5185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct UPropsVectors UPropsVectors; 5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Special pseudo code points for storing the initialValue and the errorValue, 5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which are used to initialize a UTrie2 or similar. 5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_FIRST_SPECIAL_CP 0x110000 5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_INITIAL_VALUE_CP 0x110000 5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_ERROR_VALUE_CP 0x110001 6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_MAX_CP 0x110001 6185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 6285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 6385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Special pseudo code point used in upvec_compact() signalling the end of 6485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * delivering special values and the beginning of delivering real ones. 6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Stable value, unlike UPVEC_MAX_CP which might grow over time. 6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 6785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_START_REAL_VALUES_CP 0x200000 6885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 6985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 7085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Open a UPropsVectors object. 7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @param columns Number of value integers (uint32_t) per row. 7285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 7385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UPropsVectors * U_EXPORT2 7485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_open(int32_t columns, UErrorCode *pErrorCode); 7585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 7685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 7785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_close(UPropsVectors *pv); 7885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 7985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 8085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In rows for code points [start..end], select the column, 8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * reset the mask bits and set the value bits (ANDed with the mask). 8285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 8385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). 8485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 8585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_setValue(UPropsVectors *pv, 8785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 start, UChar32 end, 8885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t column, 8985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value, uint32_t mask, 9085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode); 9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 9385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Logically const but must not be used on the same pv concurrently! 9485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Always returns 0 if called after upvec_compact(). 9585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 9685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t U_EXPORT2 9785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); 9885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 9985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 10085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * pRangeStart and pRangeEnd can be NULL. 10185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return NULL if rowIndex out of range and for illegal arguments, 10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * or if called after upvec_compact() 10385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 10485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t * U_EXPORT2 10585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getRow(const UPropsVectors *pv, int32_t rowIndex, 10685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 *pRangeStart, UChar32 *pRangeEnd); 10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 10885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Compact the vectors: 11085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - modify the memory 11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - keep only unique vectors 11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - store them contiguously from the beginning of the memory 11385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - for each (non-unique) row, call the handler function 11485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 11585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The handler's rowIndex is the index of the row in the compacted 11685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * memory block. 11785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (Therefore, it starts at 0 increases in increments of the columns value.) 11885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 11985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In a first phase, only special values are delivered (each exactly once), 12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * with start==end both equalling a special pseudo code point. 12185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP 12285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * where rowIndex is the length of the compacted array, 12385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and the row is arbitrary (but not NULL). 12485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Then, in the second phase, the handler is called for each row of real values. 12585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 12685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef void U_CALLCONV 12785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUPVecCompactHandler(void *context, 12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 start, UChar32 end, 12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t rowIndex, uint32_t *row, int32_t columns, 13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode); 13185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 13285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 13385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); 13485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 13585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 13685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Get the vectors array after calling upvec_compact(). 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The caller must not modify nor release the returned array. 13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Returns NULL if called before upvec_compact(). 13985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 14085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI const uint32_t * U_EXPORT2 14185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); 14285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 14385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Get a clone of the vectors array after calling upvec_compact(). 14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The caller owns the returned array and must uprv_free() it. 14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Returns NULL if called before upvec_compact(). 14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t * U_EXPORT2 14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_cloneArray(const UPropsVectors *pv, 15085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); 15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 15285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 15385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted 15485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * vectors array, and freeze the trie. 15585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 15685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2 15785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); 15885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostruct UPVecToUTrie2Context { 16085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2 *trie; 16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t initialValue; 16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t errorValue; 16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t maxValue; 16485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}; 16585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct UPVecToUTrie2Context UPVecToUTrie2Context; 16685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 16785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ 16885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_CALLCONV 16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compactToUTrie2Handler(void *context, 17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 start, UChar32 end, 17185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t rowIndex, uint32_t *row, int32_t columns, 17285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode); 17385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 17485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_END 17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 177