185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*******************************************************************************
385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
427f654740f2a26ad62a5c155af9199af9e69b889claireho*   Copyright (C) 2002-2010, International Business Machines
585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Corporation and others.  All Rights Reserved.
685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*******************************************************************************
885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   file name:  propsvec.h
985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   encoding:   US-ASCII
1085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   tab size:   8 (not used)
1185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   indentation:4
1285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
1385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   created on: 2002feb22
1485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   created by: Markus W. Scherer
1585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
1685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Store bits (Unicode character properties) in bit set vectors.
1785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*/
1885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
1985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifndef __UPROPSVEC_H__
2085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define __UPROPSVEC_H__
2185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
2285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "unicode/utypes.h"
2385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie.h"
2485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2.h"
2585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
2685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_BEGIN
2785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
2885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
2985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Unicode Properties Vectors associated with code point ranges.
3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Rows of uint32_t integers in a contiguous array store
3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * the range limits and the properties vectors.
3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
3485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Logically, each row has a certain number of uint32_t values,
3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which is set via the upvec_open() "columns" parameter.
3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Internally, two additional columns are stored.
3885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In each internal row,
3985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * row[0] contains the start code point and
4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * row[1] contains the limit code point,
4185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which is the start of the next range.
4285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
4385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Initially, there is only one "normal" row for
4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * range [0..0x110000[ with values 0.
4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
4685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
4785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * It would be possible to store only one range boundary per row,
4885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * but self-contained rows allow to later sort them by contents.
4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
5085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostruct UPropsVectors;
5185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct UPropsVectors UPropsVectors;
5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Special pseudo code points for storing the initialValue and the errorValue,
5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which are used to initialize a UTrie2 or similar.
5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_FIRST_SPECIAL_CP 0x110000
5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_INITIAL_VALUE_CP 0x110000
5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_ERROR_VALUE_CP 0x110001
6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_MAX_CP 0x110001
6185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
6285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
6385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Special pseudo code point used in upvec_compact() signalling the end of
6485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * delivering special values and the beginning of delivering real ones.
6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Stable value, unlike UPVEC_MAX_CP which might grow over time.
6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
6785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UPVEC_START_REAL_VALUES_CP 0x200000
6885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
6985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
7085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Open a UPropsVectors object.
7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @param columns Number of value integers (uint32_t) per row.
7285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
7385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UPropsVectors * U_EXPORT2
7485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_open(int32_t columns, UErrorCode *pErrorCode);
7585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
7685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
7785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_close(UPropsVectors *pv);
7885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
7985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
8085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In rows for code points [start..end], select the column,
8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * reset the mask bits and set the value bits (ANDed with the mask).
8285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
8385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
8485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
8585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_setValue(UPropsVectors *pv,
8785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho               UChar32 start, UChar32 end,
8885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho               int32_t column,
8985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho               uint32_t value, uint32_t mask,
9085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho               UErrorCode *pErrorCode);
9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
9385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Logically const but must not be used on the same pv concurrently!
9485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Always returns 0 if called after upvec_compact().
9585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
9685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t U_EXPORT2
9785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
9885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
9985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
10085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * pRangeStart and pRangeEnd can be NULL.
10185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return NULL if rowIndex out of range and for illegal arguments,
10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *         or if called after upvec_compact()
10385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
10485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t * U_EXPORT2
10585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
10685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             UChar32 *pRangeStart, UChar32 *pRangeEnd);
10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
10885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Compact the vectors:
11085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - modify the memory
11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - keep only unique vectors
11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - store them contiguously from the beginning of the memory
11385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - for each (non-unique) row, call the handler function
11485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
11585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The handler's rowIndex is the index of the row in the compacted
11685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * memory block.
11785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (Therefore, it starts at 0 increases in increments of the columns value.)
11885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
11985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In a first phase, only special values are delivered (each exactly once),
12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * with start==end both equalling a special pseudo code point.
12185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
12285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * where rowIndex is the length of the compacted array,
12385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and the row is arbitrary (but not NULL).
12485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Then, in the second phase, the handler is called for each row of real values.
12585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
12685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef void U_CALLCONV
12785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUPVecCompactHandler(void *context,
12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    UChar32 start, UChar32 end,
12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    int32_t rowIndex, uint32_t *row, int32_t columns,
13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    UErrorCode *pErrorCode);
13185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
13285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
13385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
13485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
13585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
13685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Get the vectors array after calling upvec_compact().
13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The caller must not modify nor release the returned array.
13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Returns NULL if called before upvec_compact().
13985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
14085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI const uint32_t * U_EXPORT2
14185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
14285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
14385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Get a clone of the vectors array after calling upvec_compact().
14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The caller owns the returned array and must uprv_free() it.
14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Returns NULL if called before upvec_compact().
14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI uint32_t * U_EXPORT2
14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_cloneArray(const UPropsVectors *pv,
15085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
15285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
15385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
15485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * vectors array, and freeze the trie.
15585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
15685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2
15785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
15885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostruct UPVecToUTrie2Context {
16085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UTrie2 *trie;
16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t initialValue;
16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t errorValue;
16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t maxValue;
16485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho};
16585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
16685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
16785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
16885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_CALLCONV
16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houpvec_compactToUTrie2Handler(void *context,
17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                             UChar32 start, UChar32 end,
17185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                             int32_t rowIndex, uint32_t *row, int32_t columns,
17285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                             UErrorCode *pErrorCode);
17385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
17485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_END
17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
177