1/*
2 ******************************************************************************
3 *   Copyright (C) 1996-2010, International Business Machines                 *
4 *   Corporation and others.  All Rights Reserved.                            *
5 ******************************************************************************
6 */
7
8/**
9 * \file
10 * \brief C++ API: Collation data used to compute minLengthInChars.
11 * \internal
12 */
13
14#ifndef COLL_DATA_H
15#define COLL_DATA_H
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_COLLATION
20
21#include "unicode/uobject.h"
22#include "unicode/ucol.h"
23
24U_NAMESPACE_BEGIN
25
26/**
27 * The size of the internal buffer for the Collator's short description string.
28 * @internal ICU 4.0.1 technology preview
29 */
30#define KEY_BUFFER_SIZE 64
31
32 /**
33  * The size of the internal CE buffer in a <code>CEList</code> object
34  * @internal ICU 4.0.1 technology preview
35  */
36#define CELIST_BUFFER_SIZE 4
37
38/**
39 * \def INSTRUMENT_CELIST
40 * Define this to enable the <code>CEList</code> objects to collect
41 * statistics.
42 * @internal ICU 4.0.1 technology preview
43 */
44//#define INSTRUMENT_CELIST
45
46 /**
47  * The size of the initial list in a <code>StringList</code> object.
48  * @internal ICU 4.0.1 technology preview
49  */
50#define STRING_LIST_BUFFER_SIZE 16
51
52/**
53 * \def INSTRUMENT_STRING_LIST
54 * Define this to enable the <code>StringList</code> objects to
55 * collect statistics.
56 * @internal ICU 4.0.1 technology preview
57 */
58//#define INSTRUMENT_STRING_LIST
59
60 /**
61  * This object holds a list of CEs generated from a particular
62  * <code>UnicodeString</code>
63  *
64  * @internal ICU 4.0.1 technology preview
65  */
66class U_I18N_API CEList : public UObject
67{
68public:
69    /**
70     * Construct a <code>CEList</code> object.
71     *
72     * @param coll - the Collator used to collect the CEs.
73     * @param string - the string for which to collect the CEs.
74     * @param status - will be set if any errors occur.
75     *
76     * Note: if on return, status is set to an error code,
77     * the only safe thing to do with this object is to call
78     * the destructor.
79     *
80     * @internal ICU 4.0.1 technology preview
81     */
82    CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
83
84    /**
85     * The destructor.
86     * @internal ICU 4.0.1 technology preview
87     */
88    ~CEList();
89
90    /**
91     * Return the number of CEs in the list.
92     *
93     * @return the number of CEs in the list.
94     *
95     * @internal ICU 4.0.1 technology preview
96     */
97    int32_t size() const;
98
99    /**
100     * Get a particular CE from the list.
101     *
102     * @param index - the index of the CE to return
103     *
104     * @return the CE, or <code>0</code> if <code>index</code> is out of range
105     *
106     * @internal ICU 4.0.1 technology preview
107     */
108    uint32_t get(int32_t index) const;
109
110    /**
111     * Check if the CEs in another <code>CEList</code> match the
112     * suffix of this list starting at a give offset.
113     *
114     * @param offset - the offset of the suffix
115     * @param other - the other <code>CEList</code>
116     *
117     * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
118     *
119     * @internal ICU 4.0.1 technology preview
120     */
121    UBool matchesAt(int32_t offset, const CEList *other) const;
122
123    /**
124     * The index operator.
125     *
126     * @param index - the index
127     *
128     * @return a reference to the given CE in the list
129     *
130     * @internal ICU 4.0.1 technology preview
131     */
132    uint32_t &operator[](int32_t index) const;
133
134    /**
135     * UObject glue...
136     * @internal ICU 4.0.1 technology preview
137     */
138    virtual UClassID getDynamicClassID() const;
139    /**
140     * UObject glue...
141     * @internal ICU 4.0.1 technology preview
142     */
143    static UClassID getStaticClassID();
144
145private:
146    void add(uint32_t ce, UErrorCode &status);
147
148    uint32_t ceBuffer[CELIST_BUFFER_SIZE];
149    uint32_t *ces;
150    int32_t listMax;
151    int32_t listSize;
152
153#ifdef INSTRUMENT_CELIST
154    static int32_t _active;
155    static int32_t _histogram[10];
156#endif
157};
158
159/**
160 * StringList
161 *
162 * This object holds a list of <code>UnicodeString</code> objects.
163 *
164 * @internal ICU 4.0.1 technology preview
165 */
166class U_I18N_API StringList : public UObject
167{
168public:
169    /**
170     * Construct an empty <code>StringList</code>
171     *
172     * @param status - will be set if any errors occur.
173     *
174     * Note: if on return, status is set to an error code,
175     * the only safe thing to do with this object is to call
176     * the destructor.
177     *
178     * @internal ICU 4.0.1 technology preview
179     */
180    StringList(UErrorCode &status);
181
182    /**
183     * The destructor.
184     *
185     * @internal ICU 4.0.1 technology preview
186     */
187    ~StringList();
188
189    /**
190     * Add a string to the list.
191     *
192     * @param string - the string to add
193     * @param status - will be set if any errors occur.
194     *
195     * @internal ICU 4.0.1 technology preview
196     */
197    void add(const UnicodeString *string, UErrorCode &status);
198
199    /**
200     * Add an array of Unicode code points to the list.
201     *
202     * @param chars - the address of the array of code points
203     * @param count - the number of code points in the array
204     * @param status - will be set if any errors occur.
205     *
206     * @internal ICU 4.0.1 technology preview
207     */
208    void add(const UChar *chars, int32_t count, UErrorCode &status);
209
210    /**
211     * Get a particular string from the list.
212     *
213     * @param index - the index of the string
214     *
215     * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
216     *         if <code>index</code> is out of bounds.
217     *
218     * @internal ICU 4.0.1 technology preview
219     */
220    const UnicodeString *get(int32_t index) const;
221
222    /**
223     * Get the number of stings in the list.
224     *
225     * @return the number of strings in the list.
226     *
227     * @internal ICU 4.0.1 technology preview
228     */
229    int32_t size() const;
230
231    /**
232     * the UObject glue...
233     * @internal ICU 4.0.1 technology preview
234     */
235    virtual UClassID getDynamicClassID() const;
236    /**
237     * the UObject glue...
238     * @internal ICU 4.0.1 technology preview
239     */
240    static UClassID getStaticClassID();
241
242private:
243    UnicodeString *strings;
244    int32_t listMax;
245    int32_t listSize;
246
247#ifdef INSTRUMENT_STRING_LIST
248    static int32_t _lists;
249    static int32_t _strings;
250    static int32_t _histogram[101];
251#endif
252};
253
254/*
255 * Forward references to internal classes.
256 */
257class StringToCEsMap;
258class CEToStringsMap;
259class CollDataCache;
260
261/**
262 * CollData
263 *
264 * This class holds the Collator-specific data needed to
265 * compute the length of the shortest string that can
266 * generate a partcular list of CEs.
267 *
268 * <code>CollData</code> objects are quite expensive to compute. Because
269 * of this, they are cached. When you call <code>CollData::open</code> it
270 * returns a reference counted cached object. When you call <code>CollData::close</code>
271 * the reference count on the object is decremented but the object is not deleted.
272 *
273 * If you do not need to reuse any unreferenced objects in the cache, you can call
274 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
275 * objects, you can call <code>CollData::freeCollDataCache</code>
276 *
277 * @internal ICU 4.0.1 technology preview
278 */
279class U_I18N_API CollData : public UObject
280{
281public:
282    /**
283     * Construct a <code>CollData</code> object.
284     *
285     * @param collator - the collator
286     * @param status - will be set if any errors occur.
287     *
288     * @return the <code>CollData</code> object. You must call
289     *         <code>close</code> when you are done using the object.
290     *
291     * Note: if on return, status is set to an error code,
292     * the only safe thing to do with this object is to call
293     * <code>CollData::close</code>.
294     *
295     * @internal ICU 4.0.1 technology preview
296     */
297    static CollData *open(UCollator *collator, UErrorCode &status);
298
299    /**
300     * Release a <code>CollData</code> object.
301     *
302     * @param collData - the object
303     *
304     * @internal ICU 4.0.1 technology preview
305     */
306    static void close(CollData *collData);
307
308    /**
309     * Get the <code>UCollator</code> object used to create this object.
310     * The object returned may not be the exact object that was used to
311     * create this object, but it will have the same behavior.
312     * @internal ICU 4.0.1 technology preview
313     */
314    UCollator *getCollator() const;
315
316    /**
317     * Get a list of all the strings which generate a list
318     * of CEs starting with a given CE.
319     *
320     * @param ce - the CE
321     *
322     * return a <code>StringList</code> object containing all
323     *        the stirngs, or <code>NULL</code> if there are
324     *        no such strings.
325     *
326     * @internal ICU 4.0.1 technology preview.
327     */
328    const StringList *getStringList(int32_t ce) const;
329
330    /**
331     * Get a list of the CEs generated by a partcular stirng.
332     *
333     * @param string - the string
334     *
335     * @return a <code>CEList</code> object containt the CEs. You
336     *         must call <code>freeCEList</code> when you are finished
337     *         using the <code>CEList</code>/
338     *
339     * @internal ICU 4.0.1 technology preview.
340     */
341    const CEList *getCEList(const UnicodeString *string) const;
342
343    /**
344     * Release a <code>CEList</code> returned by <code>getCEList</code>.
345     *
346     * @param list - the <code>CEList</code> to free.
347     *
348     * @internal ICU 4.0.1 technology preview
349     */
350    void freeCEList(const CEList *list);
351
352    /**
353     * Return the length of the shortest string that will generate
354     * the given list of CEs.
355     *
356     * @param ces - the CEs
357     * @param offset - the offset of the first CE in the list to use.
358     *
359     * @return the length of the shortest string.
360     *
361     * @internal ICU 4.0.1 technology preview
362     */
363    int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
364
365
366    /**
367     * Return the length of the shortest string that will generate
368     * the given list of CEs.
369     *
370     * Note: the algorithm used to do this computation is recursive. To
371     * limit the amount of recursion, a "history" list is used to record
372     * the best answer starting at a particular offset in the list of CEs.
373     * If the same offset is visited again during the recursion, the answer
374     * in the history list is used.
375     *
376     * @param ces - the CEs
377     * @param offset - the offset of the first CE in the list to use.
378     * @param history - the history list. Must be at least as long as
379     *                 the number of cEs in the <code>CEList</code>
380     *
381     * @return the length of the shortest string.
382     *
383     * @internal ICU 4.0.1 technology preview
384     */
385   int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
386
387   /**
388    * UObject glue...
389    * @internal ICU 4.0.1 technology preview
390    */
391    virtual UClassID getDynamicClassID() const;
392   /**
393    * UObject glue...
394    * @internal ICU 4.0.1 technology preview
395    */
396    static UClassID getStaticClassID();
397
398    /**
399     * <code>CollData</code> objects are expensive to compute, and so
400     * may be cached. This routine will free the cached objects and delete
401     * the cache.
402     *
403     * WARNING: Don't call this until you are have called <code>close</code>
404     * for each <code>CollData</code> object that you have used. also,
405     * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
406     * at the same time.
407     *
408     * @internal 4.0.1 technology preview
409     */
410    static void freeCollDataCache();
411
412    /**
413     * <code>CollData</code> objects are expensive to compute, and so
414     * may be cached. This routine will remove any unused <code>CollData</code>
415     * objects from the cache.
416     *
417     * @internal 4.0.1 technology preview
418     */
419    static void flushCollDataCache();
420
421private:
422    friend class CollDataCache;
423    friend class CollDataCacheEntry;
424
425    CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
426    ~CollData();
427
428    CollData();
429
430    static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
431
432    static CollDataCache *getCollDataCache();
433
434    UCollator      *coll;
435    StringToCEsMap *charsToCEList;
436    CEToStringsMap *ceToCharsStartingWith;
437
438    char keyBuffer[KEY_BUFFER_SIZE];
439    char *key;
440
441    static CollDataCache *collDataCache;
442
443    uint32_t minHan;
444    uint32_t maxHan;
445
446    uint32_t jamoLimits[4];
447};
448
449U_NAMESPACE_END
450
451#endif // #if !UCONFIG_NO_COLLATION
452#endif // #ifndef COLL_DATA_H
453