1/*
2*******************************************************************************
3*
4*   Copyright (C) 2008-2010, International Business Machines
5*   Corporation, Google and others.  All Rights Reserved.
6*
7*******************************************************************************
8*/
9/*
10 * Author : eldawy@google.com (Mohamed Eldawy)
11 * ucnvsel.h
12 *
13 * Purpose: To generate a list of encodings capable of handling
14 * a given Unicode text
15 *
16 * Started 09-April-2008
17 */
18
19#ifndef __ICU_UCNV_SEL_H__
20#define __ICU_UCNV_SEL_H__
21
22#include "unicode/uset.h"
23#include "unicode/utypes.h"
24#include "unicode/utf16.h"
25#include "unicode/uenum.h"
26#include "unicode/ucnv.h"
27#include "unicode/localpointer.h"
28
29/**
30 * \file
31 *
32 * A converter selector is built with a set of encoding/charset names
33 * and given an input string returns the set of names of the
34 * corresponding converters which can convert the string.
35 *
36 * A converter selector can be serialized into a buffer and reopened
37 * from the serialized form.
38 */
39
40/**
41 * @{
42 * The selector data structure
43 */
44struct UConverterSelector;
45typedef struct UConverterSelector UConverterSelector;
46/** @} */
47
48/**
49 * Open a selector.
50 * If converterListSize is 0, build for all available converters.
51 * If excludedCodePoints is NULL, don't exclude any code points.
52 *
53 * @param converterList a pointer to encoding names needed to be involved.
54 *                      Can be NULL if converterListSize==0.
55 *                      The list and the names will be cloned, and the caller
56 *                      retains ownership of the original.
57 * @param converterListSize number of encodings in above list.
58 *                          If 0, builds a selector for all available converters.
59 * @param excludedCodePoints a set of code points to be excluded from consideration.
60 *                           That is, excluded code points in a string do not change
61 *                           the selection result. (They might be handled by a callback.)
62 *                           Use NULL to exclude nothing.
63 * @param whichSet what converter set to use? Use this to determine whether
64 *                 to consider only roundtrip mappings or also fallbacks.
65 * @param status an in/out ICU UErrorCode
66 * @return the new selector
67 *
68 * @stable ICU 4.2
69 */
70U_STABLE UConverterSelector* U_EXPORT2
71ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
72             const USet* excludedCodePoints,
73             const UConverterUnicodeSet whichSet, UErrorCode* status);
74
75/**
76 * Closes a selector.
77 * If any Enumerations were returned by ucnv_select*, they become invalid.
78 * They can be closed before or after calling ucnv_closeSelector,
79 * but should never be used after the selector is closed.
80 *
81 * @see ucnv_selectForString
82 * @see ucnv_selectForUTF8
83 *
84 * @param sel selector to close
85 *
86 * @stable ICU 4.2
87 */
88U_STABLE void U_EXPORT2
89ucnvsel_close(UConverterSelector *sel);
90
91#if U_SHOW_CPLUSPLUS_API
92
93U_NAMESPACE_BEGIN
94
95/**
96 * \class LocalUConverterSelectorPointer
97 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
98 * For most methods see the LocalPointerBase base class.
99 *
100 * @see LocalPointerBase
101 * @see LocalPointer
102 * @draft ICU 4.4
103 */
104U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
105
106U_NAMESPACE_END
107
108#endif
109
110/**
111 * Open a selector from its serialized form.
112 * The buffer must remain valid and unchanged for the lifetime of the selector.
113 * This is much faster than creating a selector from scratch.
114 * Using a serialized form from a different machine (endianness/charset) is supported.
115 *
116 * @param buffer pointer to the serialized form of a converter selector;
117 *               must be 32-bit-aligned
118 * @param length the capacity of this buffer (can be equal to or larger than
119 *               the actual data length)
120 * @param status an in/out ICU UErrorCode
121 * @return the new selector
122 *
123 * @stable ICU 4.2
124 */
125U_STABLE UConverterSelector* U_EXPORT2
126ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
127
128/**
129 * Serialize a selector into a linear buffer.
130 * The serialized form is portable to different machines.
131 *
132 * @param sel selector to consider
133 * @param buffer pointer to 32-bit-aligned memory to be filled with the
134 *               serialized form of this converter selector
135 * @param bufferCapacity the capacity of this buffer
136 * @param status an in/out ICU UErrorCode
137 * @return the required buffer capacity to hold serialize data (even if the call fails
138 *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
139 *
140 * @stable ICU 4.2
141 */
142U_STABLE int32_t U_EXPORT2
143ucnvsel_serialize(const UConverterSelector* sel,
144                  void* buffer, int32_t bufferCapacity, UErrorCode* status);
145
146/**
147 * Select converters that can map all characters in a UTF-16 string,
148 * ignoring the excluded code points.
149 *
150 * @param sel a selector
151 * @param s UTF-16 string
152 * @param length length of the string, or -1 if NUL-terminated
153 * @param status an in/out ICU UErrorCode
154 * @return an enumeration containing encoding names.
155 *         The returned encoding names and their order will be the same as
156 *         supplied when building the selector.
157 *
158 * @stable ICU 4.2
159 */
160U_STABLE UEnumeration * U_EXPORT2
161ucnvsel_selectForString(const UConverterSelector* sel,
162                        const UChar *s, int32_t length, UErrorCode *status);
163
164/**
165 * Select converters that can map all characters in a UTF-8 string,
166 * ignoring the excluded code points.
167 *
168 * @param sel a selector
169 * @param s UTF-8 string
170 * @param length length of the string, or -1 if NUL-terminated
171 * @param status an in/out ICU UErrorCode
172 * @return an enumeration containing encoding names.
173 *         The returned encoding names and their order will be the same as
174 *         supplied when building the selector.
175 *
176 * @stable ICU 4.2
177 */
178U_STABLE UEnumeration * U_EXPORT2
179ucnvsel_selectForUTF8(const UConverterSelector* sel,
180                      const char *s, int32_t length, UErrorCode *status);
181
182#endif  /* __ICU_UCNV_SEL_H__ */
183