1/**
2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved.                                                        *
5 *******************************************************************************
6 */
7
8#ifndef TRIEDICT_H
9#define TRIEDICT_H
10
11#include "unicode/utypes.h"
12#include "unicode/uobject.h"
13#include "unicode/utext.h"
14
15struct UEnumeration;
16struct UDataSwapper;
17struct UDataMemory;
18
19 /**
20  * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
21  *
22  * @param ds Pointer to UDataSwapper containing global data about the
23  *           transformation and function pointers for handling primitive
24  *           types.
25  * @param inData Pointer to the input data to be transformed or examined.
26  * @param length Length of the data, counting bytes. May be -1 for preflighting.
27  *               If length>=0, then transform the data.
28  *               If length==-1, then only determine the length of the data.
29  *               The length cannot be determined from the data itself for all
30  *               types of data (e.g., not for simple arrays of integers).
31  * @param outData Pointer to the output data buffer.
32  *                If length>=0 (transformation), then the output buffer must
33  *                have a capacity of at least length.
34  *                If length==-1, then outData will not be used and can be NULL.
35  * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
36  *                   fulfill U_SUCCESS on input.
37  * @return The actual length of the data.
38  *
39  * @see UDataSwapper
40  */
41
42U_CAPI int32_t U_EXPORT2
43triedict_swap(const UDataSwapper *ds,
44            const void *inData, int32_t length, void *outData,
45            UErrorCode *pErrorCode);
46
47U_NAMESPACE_BEGIN
48
49class StringEnumeration;
50struct CompactTrieHeader;
51
52/*******************************************************************
53 * TrieWordDictionary
54 */
55
56/**
57 * <p>TrieWordDictionary is an abstract class that represents a word
58 * dictionary based on a trie. The base protocol is read-only.
59 * Subclasses may allow writing.</p>
60 */
61class U_COMMON_API TrieWordDictionary : public UMemory {
62 public:
63
64  /**
65   * <p>Default constructor.</p>
66   *
67   */
68  TrieWordDictionary();
69
70  /**
71   * <p>Virtual destructor.</p>
72   */
73  virtual ~TrieWordDictionary();
74
75 /**
76  * <p>Find dictionary words that match the text.</p>
77  *
78  * @param text A UText representing the text. The
79  * iterator is left after the longest prefix match in the dictionary.
80  * @param start The current position in text.
81  * @param maxLength The maximum number of code units to match.
82  * @param lengths An array that is filled with the lengths of words that matched.
83  * @param count Filled with the number of elements output in lengths.
84  * @param limit The size of the lengths array; this limits the number of words output.
85  * @return The number of characters in text that were matched.
86  */
87  virtual int32_t matches( UText *text,
88                              int32_t maxLength,
89                              int32_t *lengths,
90                              int &count,
91                              int limit ) const = 0;
92
93  /**
94   * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
95   *
96   * @param status A status code recording the success of the call.
97   * @return A StringEnumeration that will iterate through the whole dictionary.
98   * The caller is responsible for closing it. The order is unspecified.
99   */
100  virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
101
102};
103
104/*******************************************************************
105 * MutableTrieDictionary
106 */
107
108/**
109 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
110 * added.</p>
111 */
112
113struct TernaryNode;             // Forwards declaration
114
115class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
116 private:
117    /**
118     * The root node of the trie
119     * @internal
120     */
121
122  TernaryNode               *fTrie;
123
124    /**
125     * A UText for internal use
126     * @internal
127     */
128
129  UText    *fIter;
130
131  friend class CompactTrieDictionary;   // For fast conversion
132
133 public:
134
135 /**
136  * <p>Constructor.</p>
137  *
138  * @param median A UChar around which to balance the trie. Ideally, it should
139  * begin at least one word that is near the median of the set in the dictionary
140  * @param status A status code recording the success of the call.
141  */
142  MutableTrieDictionary( UChar median, UErrorCode &status );
143
144  /**
145   * <p>Virtual destructor.</p>
146   */
147  virtual ~MutableTrieDictionary();
148
149 /**
150  * <p>Find dictionary words that match the text.</p>
151  *
152  * @param text A UText representing the text. The
153  * iterator is left after the longest prefix match in the dictionary.
154  * @param maxLength The maximum number of code units to match.
155  * @param lengths An array that is filled with the lengths of words that matched.
156  * @param count Filled with the number of elements output in lengths.
157  * @param limit The size of the lengths array; this limits the number of words output.
158  * @return The number of characters in text that were matched.
159  */
160  virtual int32_t matches( UText *text,
161                              int32_t maxLength,
162                              int32_t *lengths,
163                              int &count,
164                              int limit ) const;
165
166  /**
167   * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
168   *
169   * @param status A status code recording the success of the call.
170   * @return A StringEnumeration that will iterate through the whole dictionary.
171   * The caller is responsible for closing it. The order is unspecified.
172   */
173  virtual StringEnumeration *openWords( UErrorCode &status ) const;
174
175 /**
176  * <p>Add one word to the dictionary.</p>
177  *
178  * @param word A UChar buffer containing the word.
179  * @param length The length of the word.
180  * @param status The resultant status
181  */
182  virtual void addWord( const UChar *word,
183                        int32_t length,
184                        UErrorCode &status);
185
186#if 0
187 /**
188  * <p>Add all strings from a UEnumeration to the dictionary.</p>
189  *
190  * @param words A UEnumeration that will return the desired words.
191  * @param status The resultant status
192  */
193  virtual void addWords( UEnumeration *words, UErrorCode &status );
194#endif
195
196protected:
197 /**
198  * <p>Search the dictionary for matches.</p>
199  *
200  * @param text A UText representing the text. The
201  * iterator is left after the longest prefix match in the dictionary.
202  * @param maxLength The maximum number of code units to match.
203  * @param lengths An array that is filled with the lengths of words that matched.
204  * @param count Filled with the number of elements output in lengths.
205  * @param limit The size of the lengths array; this limits the number of words output.
206  * @param parent The parent of the current node
207  * @param pMatched The returned parent node matched the input
208  * @return The number of characters in text that were matched.
209  */
210  virtual int32_t search( UText *text,
211                              int32_t maxLength,
212                              int32_t *lengths,
213                              int &count,
214                              int limit,
215                              TernaryNode *&parent,
216                              UBool &pMatched ) const;
217
218private:
219 /**
220  * <p>Private constructor. The root node it not allocated.</p>
221  *
222  * @param status A status code recording the success of the call.
223  */
224  MutableTrieDictionary( UErrorCode &status );
225};
226
227/*******************************************************************
228 * CompactTrieDictionary
229 */
230
231/**
232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
233 * to save space.</p>
234 */
235class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
236 private:
237    /**
238     * The root node of the trie
239     */
240
241  const CompactTrieHeader   *fData;
242
243    /**
244     * A UBool indicating whether or not we own the fData.
245     */
246
247  UBool                     fOwnData;
248
249    UDataMemory              *fUData;
250 public:
251  /**
252   * <p>Construct a dictionary from a UDataMemory.</p>
253   *
254   * @param data A pointer to a UDataMemory, which is adopted
255   * @param status A status code giving the result of the constructor
256   */
257  CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
258
259  /**
260   * <p>Construct a dictionary from raw saved data.</p>
261   *
262   * @param data A pointer to the raw data, which is still owned by the caller
263   * @param status A status code giving the result of the constructor
264   */
265  CompactTrieDictionary(const void *dataObj, UErrorCode &status);
266
267  /**
268   * <p>Construct a dictionary from a MutableTrieDictionary.</p>
269   *
270   * @param dict The dictionary to use as input.
271   * @param status A status code recording the success of the call.
272   */
273  CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
274
275  /**
276   * <p>Virtual destructor.</p>
277   */
278  virtual ~CompactTrieDictionary();
279
280 /**
281  * <p>Find dictionary words that match the text.</p>
282  *
283  * @param text A UText representing the text. The
284  * iterator is left after the longest prefix match in the dictionary.
285  * @param maxLength The maximum number of code units to match.
286  * @param lengths An array that is filled with the lengths of words that matched.
287  * @param count Filled with the number of elements output in lengths.
288  * @param limit The size of the lengths array; this limits the number of words output.
289  * @return The number of characters in text that were matched.
290  */
291  virtual int32_t matches( UText *text,
292                              int32_t rangeEnd,
293                              int32_t *lengths,
294                              int &count,
295                              int limit ) const;
296
297  /**
298   * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
299   *
300   * @param status A status code recording the success of the call.
301   * @return A StringEnumeration that will iterate through the whole dictionary.
302   * The caller is responsible for closing it. The order is unspecified.
303   */
304  virtual StringEnumeration *openWords( UErrorCode &status ) const;
305
306 /**
307  * <p>Return the size of the compact data.</p>
308  *
309  * @return The size of the dictionary's compact data.
310  */
311  virtual uint32_t dataSize() const;
312
313 /**
314  * <p>Return a void * pointer to the compact data, platform-endian.</p>
315  *
316  * @return The data for the compact dictionary, suitable for passing to the
317  * constructor.
318  */
319  virtual const void *data() const;
320
321 /**
322  * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
323  *
324  * @param status A status code recording the success of the call.
325  * @return A MutableTrieDictionary with the same data as this dictionary
326  */
327  virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
328
329 private:
330
331  /**
332   * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
333   *
334   * @param dict The dictionary to convert.
335   * @param status A status code recording the success of the call.
336   * @return A single data blob starting with a CompactTrieHeader.
337   */
338  static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
339                                                        UErrorCode &status );
340
341};
342
343U_NAMESPACE_END
344
345    /* TRIEDICT_H */
346#endif
347