1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru******************************************************************************* 3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2008-2010, International Business Machines 5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Corporation, Google and others. All Rights Reserved. 6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru******************************************************************************* 8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/ 9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Author : eldawy@google.com (Mohamed Eldawy) 11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * ucnvsel.h 12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Purpose: To generate a list of encodings capable of handling 14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * a given Unicode text 15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Started 09-April-2008 17b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#ifndef __ICU_UCNV_SEL_H__ 20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define __ICU_UCNV_SEL_H__ 21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uset.h" 23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h" 24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utf16.h" 25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uenum.h" 26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/ucnv.h" 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h" 28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * \file 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * A converter selector is built with a set of encoding/charset names 33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * and given an input string returns the set of names of the 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * corresponding converters which can convert the string. 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * A converter selector can be serialized into a buffer and reopened 37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * from the serialized form. 38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @{ 42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The selector data structure 43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct UConverterSelector; 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querutypedef struct UConverterSelector UConverterSelector; 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** @} */ 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Open a selector. 50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * If converterListSize is 0, build for all available converters. 51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * If excludedCodePoints is NULL, don't exclude any code points. 52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param converterList a pointer to encoding names needed to be involved. 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Can be NULL if converterListSize==0. 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The list and the names will be cloned, and the caller 56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * retains ownership of the original. 57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param converterListSize number of encodings in above list. 58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * If 0, builds a selector for all available converters. 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param excludedCodePoints a set of code points to be excluded from consideration. 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * That is, excluded code points in a string do not change 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * the selection result. (They might be handled by a callback.) 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Use NULL to exclude nothing. 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param whichSet what converter set to use? Use this to determine whether 64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * to consider only roundtrip mappings or also fallbacks. 65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return the new selector 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE UConverterSelector* U_EXPORT2 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_open(const char* const* converterList, int32_t converterListSize, 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const USet* excludedCodePoints, 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UConverterUnicodeSet whichSet, UErrorCode* status); 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Closes a selector. 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * If any Enumerations were returned by ucnv_select*, they become invalid. 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * They can be closed before or after calling ucnv_closeSelector, 79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * but should never be used after the selector is closed. 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @see ucnv_selectForString 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @see ucnv_selectForUTF8 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sel selector to close 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_close(UConverterSelector *sel); 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \class LocalUConverterSelectorPointer 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For most methods see the LocalPointerBase base class. 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointerBase 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointer 10227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close); 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Open a selector from its serialized form. 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The buffer must remain valid and unchanged for the lifetime of the selector. 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * This is much faster than creating a selector from scratch. 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Using a serialized form from a different machine (endianness/charset) is supported. 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param buffer pointer to the serialized form of a converter selector; 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * must be 32-bit-aligned 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length the capacity of this buffer (can be equal to or larger than 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * the actual data length) 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return the new selector 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE UConverterSelector* U_EXPORT2 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); 127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Serialize a selector into a linear buffer. 130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The serialized form is portable to different machines. 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sel selector to consider 133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param buffer pointer to 32-bit-aligned memory to be filled with the 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * serialized form of this converter selector 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param bufferCapacity the capacity of this buffer 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return the required buffer capacity to hold serialize data (even if the call fails 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_serialize(const UConverterSelector* sel, 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void* buffer, int32_t bufferCapacity, UErrorCode* status); 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Select converters that can map all characters in a UTF-16 string, 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * ignoring the excluded code points. 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sel a selector 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param s UTF-16 string 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length length of the string, or -1 if NUL-terminated 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return an enumeration containing encoding names. 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The returned encoding names and their order will be the same as 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * supplied when building the selector. 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE UEnumeration * U_EXPORT2 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_selectForString(const UConverterSelector* sel, 162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *s, int32_t length, UErrorCode *status); 163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Select converters that can map all characters in a UTF-8 string, 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * ignoring the excluded code points. 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sel a selector 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param s UTF-8 string 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length length of the string, or -1 if NUL-terminated 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return an enumeration containing encoding names. 173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The returned encoding names and their order will be the same as 174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * supplied when building the selector. 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE UEnumeration * U_EXPORT2 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_selectForUTF8(const UConverterSelector* sel, 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *s, int32_t length, UErrorCode *status); 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif /* __ICU_UCNV_SEL_H__ */ 183