1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2008-2011, International Business Machines
7*   Corporation, Google and others.  All Rights Reserved.
8*
9*******************************************************************************
10*/
11/*
12 * Author : eldawy@google.com (Mohamed Eldawy)
13 * ucnvsel.h
14 *
15 * Purpose: To generate a list of encodings capable of handling
16 * a given Unicode text
17 *
18 * Started 09-April-2008
19 */
20
21#ifndef __ICU_UCNV_SEL_H__
22#define __ICU_UCNV_SEL_H__
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_CONVERSION
27
28#include "unicode/uset.h"
29#include "unicode/utf16.h"
30#include "unicode/uenum.h"
31#include "unicode/ucnv.h"
32#include "unicode/localpointer.h"
33
34/**
35 * \file
36 *
37 * A converter selector is built with a set of encoding/charset names
38 * and given an input string returns the set of names of the
39 * corresponding converters which can convert the string.
40 *
41 * A converter selector can be serialized into a buffer and reopened
42 * from the serialized form.
43 */
44
45/**
46 * @{
47 * The selector data structure
48 */
49struct UConverterSelector;
50typedef struct UConverterSelector UConverterSelector;
51/** @} */
52
53/**
54 * Open a selector.
55 * If converterListSize is 0, build for all available converters.
56 * If excludedCodePoints is NULL, don't exclude any code points.
57 *
58 * @param converterList a pointer to encoding names needed to be involved.
59 *                      Can be NULL if converterListSize==0.
60 *                      The list and the names will be cloned, and the caller
61 *                      retains ownership of the original.
62 * @param converterListSize number of encodings in above list.
63 *                          If 0, builds a selector for all available converters.
64 * @param excludedCodePoints a set of code points to be excluded from consideration.
65 *                           That is, excluded code points in a string do not change
66 *                           the selection result. (They might be handled by a callback.)
67 *                           Use NULL to exclude nothing.
68 * @param whichSet what converter set to use? Use this to determine whether
69 *                 to consider only roundtrip mappings or also fallbacks.
70 * @param status an in/out ICU UErrorCode
71 * @return the new selector
72 *
73 * @stable ICU 4.2
74 */
75U_STABLE UConverterSelector* U_EXPORT2
76ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
77             const USet* excludedCodePoints,
78             const UConverterUnicodeSet whichSet, UErrorCode* status);
79
80/**
81 * Closes a selector.
82 * If any Enumerations were returned by ucnv_select*, they become invalid.
83 * They can be closed before or after calling ucnv_closeSelector,
84 * but should never be used after the selector is closed.
85 *
86 * @see ucnv_selectForString
87 * @see ucnv_selectForUTF8
88 *
89 * @param sel selector to close
90 *
91 * @stable ICU 4.2
92 */
93U_STABLE void U_EXPORT2
94ucnvsel_close(UConverterSelector *sel);
95
96#if U_SHOW_CPLUSPLUS_API
97
98U_NAMESPACE_BEGIN
99
100/**
101 * \class LocalUConverterSelectorPointer
102 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
103 * For most methods see the LocalPointerBase base class.
104 *
105 * @see LocalPointerBase
106 * @see LocalPointer
107 * @stable ICU 4.4
108 */
109U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
110
111U_NAMESPACE_END
112
113#endif
114
115/**
116 * Open a selector from its serialized form.
117 * The buffer must remain valid and unchanged for the lifetime of the selector.
118 * This is much faster than creating a selector from scratch.
119 * Using a serialized form from a different machine (endianness/charset) is supported.
120 *
121 * @param buffer pointer to the serialized form of a converter selector;
122 *               must be 32-bit-aligned
123 * @param length the capacity of this buffer (can be equal to or larger than
124 *               the actual data length)
125 * @param status an in/out ICU UErrorCode
126 * @return the new selector
127 *
128 * @stable ICU 4.2
129 */
130U_STABLE UConverterSelector* U_EXPORT2
131ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
132
133/**
134 * Serialize a selector into a linear buffer.
135 * The serialized form is portable to different machines.
136 *
137 * @param sel selector to consider
138 * @param buffer pointer to 32-bit-aligned memory to be filled with the
139 *               serialized form of this converter selector
140 * @param bufferCapacity the capacity of this buffer
141 * @param status an in/out ICU UErrorCode
142 * @return the required buffer capacity to hold serialize data (even if the call fails
143 *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
144 *
145 * @stable ICU 4.2
146 */
147U_STABLE int32_t U_EXPORT2
148ucnvsel_serialize(const UConverterSelector* sel,
149                  void* buffer, int32_t bufferCapacity, UErrorCode* status);
150
151/**
152 * Select converters that can map all characters in a UTF-16 string,
153 * ignoring the excluded code points.
154 *
155 * @param sel a selector
156 * @param s UTF-16 string
157 * @param length length of the string, or -1 if NUL-terminated
158 * @param status an in/out ICU UErrorCode
159 * @return an enumeration containing encoding names.
160 *         The returned encoding names and their order will be the same as
161 *         supplied when building the selector.
162 *
163 * @stable ICU 4.2
164 */
165U_STABLE UEnumeration * U_EXPORT2
166ucnvsel_selectForString(const UConverterSelector* sel,
167                        const UChar *s, int32_t length, UErrorCode *status);
168
169/**
170 * Select converters that can map all characters in a UTF-8 string,
171 * ignoring the excluded code points.
172 *
173 * @param sel a selector
174 * @param s UTF-8 string
175 * @param length length of the string, or -1 if NUL-terminated
176 * @param status an in/out ICU UErrorCode
177 * @return an enumeration containing encoding names.
178 *         The returned encoding names and their order will be the same as
179 *         supplied when building the selector.
180 *
181 * @stable ICU 4.2
182 */
183U_STABLE UEnumeration * U_EXPORT2
184ucnvsel_selectForUTF8(const UConverterSelector* sel,
185                      const char *s, int32_t length, UErrorCode *status);
186
187#endif  /* !UCONFIG_NO_CONVERSION */
188
189#endif  /* __ICU_UCNV_SEL_H__ */
190