1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 2002-2010, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  propsvec.h
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2002feb22
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Markus W. Scherer
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Store bits (Unicode character properties) in bit set vectors.
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __UPROPSVEC_H__
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define __UPROPSVEC_H__
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode Properties Vectors associated with code point ranges.
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Rows of uint32_t integers in a contiguous array store
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the range limits and the properties vectors.
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Logically, each row has a certain number of uint32_t values,
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which is set via the upvec_open() "columns" parameter.
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Internally, two additional columns are stored.
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In each internal row,
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * row[0] contains the start code point and
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * row[1] contains the limit code point,
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which is the start of the next range.
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Initially, there is only one "normal" row for
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * range [0..0x110000[ with values 0.
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It would be possible to store only one range boundary per row,
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * but self-contained rows allow to later sort them by contents.
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UPropsVectors;
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UPropsVectors UPropsVectors;
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Special pseudo code points for storing the initialValue and the errorValue,
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which are used to initialize a UTrie2 or similar.
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UPVEC_FIRST_SPECIAL_CP 0x110000
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UPVEC_INITIAL_VALUE_CP 0x110000
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UPVEC_ERROR_VALUE_CP 0x110001
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UPVEC_MAX_CP 0x110001
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Special pseudo code point used in upvec_compact() signalling the end of
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * delivering special values and the beginning of delivering real ones.
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Stable value, unlike UPVEC_MAX_CP which might grow over time.
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UPVEC_START_REAL_VALUES_CP 0x200000
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Open a UPropsVectors object.
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param columns Number of value integers (uint32_t) per row.
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UPropsVectors * U_EXPORT2
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_open(int32_t columns, UErrorCode *pErrorCode);
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_close(UPropsVectors *pv);
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In rows for code points [start..end], select the column,
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * reset the mask bits and set the value bits (ANDed with the mask).
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_setValue(UPropsVectors *pv,
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               UChar32 start, UChar32 end,
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               int32_t column,
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               uint32_t value, uint32_t mask,
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               UErrorCode *pErrorCode);
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Logically const but must not be used on the same pv concurrently!
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Always returns 0 if called after upvec_compact().
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * pRangeStart and pRangeEnd can be NULL.
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return NULL if rowIndex out of range and for illegal arguments,
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *         or if called after upvec_compact()
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t * U_EXPORT2
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             UChar32 *pRangeStart, UChar32 *pRangeEnd);
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact the vectors:
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - modify the memory
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - keep only unique vectors
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - store them contiguously from the beginning of the memory
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - for each (non-unique) row, call the handler function
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The handler's rowIndex is the index of the row in the compacted
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * memory block.
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (Therefore, it starts at 0 increases in increments of the columns value.)
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In a first phase, only special values are delivered (each exactly once),
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with start==end both equalling a special pseudo code point.
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * where rowIndex is the length of the compacted array,
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and the row is arbitrary (but not NULL).
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Then, in the second phase, the handler is called for each row of real values.
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef void U_CALLCONV
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UPVecCompactHandler(void *context,
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UChar32 start, UChar32 end,
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t rowIndex, uint32_t *row, int32_t columns,
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode *pErrorCode);
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the vectors array after calling upvec_compact().
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The caller must not modify nor release the returned array.
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns NULL if called before upvec_compact().
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const uint32_t * U_EXPORT2
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a clone of the vectors array after calling upvec_compact().
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The caller owns the returned array and must uprv_free() it.
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns NULL if called before upvec_compact().
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t * U_EXPORT2
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_cloneArray(const UPropsVectors *pv,
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * vectors array, and freeze the trie.
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UPVecToUTrie2Context {
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrie2 *trie;
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t initialValue;
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t errorValue;
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t maxValue;
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_CALLCONV
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)upvec_compactToUTrie2Handler(void *context,
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             UChar32 start, UChar32 end,
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             int32_t rowIndex, uint32_t *row, int32_t columns,
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             UErrorCode *pErrorCode);
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
177