16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ********************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (C) 2005-2013, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ********************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * file name: ucsdet.h 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indentation:4 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * created on: 2005Aug04 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * created by: Andy Heninger 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ICU Character Set Detection, API for C 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Draft version 18 Oct 2005 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifndef __UCSDET_H 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define __UCSDET_H 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_CONVERSION 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/localpointer.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uenum.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * \file 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * \brief C API: Charset Detection API 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This API provides a facility for detecting the 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * charset or encoding of character data in an unknown text format. 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The input data can be from an array of bytes. 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Character set detection is at best an imprecise operation. The detection 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * process will attempt to identify the charset that best matches the characteristics 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the byte data, but the process is partly statistical in nature, and 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the results can not be guaranteed to always be correct. 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For best accuracy in charset detection, the input data should be primarily 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in a single language, and a minimum of a few hundred bytes worth of plain text 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the language are needed. The detection process will attempt to 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ignore html or xml style markup that could otherwise obscure the content. 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct UCharsetDetector; 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Structure representing a charset detector 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct UCharsetDetector UCharsetDetector; 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct UCharsetMatch; 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Opaque structure representing a match that was identified 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from a charset detection operation. 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct UCharsetMatch UCharsetMatch; 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Open a charset detector. 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions occurring during the open 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * operation are reported back in this variable. 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return the newly opened charset detector. 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE UCharsetDetector * U_EXPORT2 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_open(UErrorCode *status); 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Close a charset detector. All storage and any other resources 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * owned by this charset detector will be released. Failure to 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * close a charset detector when finished with it can result in 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * memory leaks in the application. 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd The charset detector to be closed. 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE void U_EXPORT2 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_close(UCharsetDetector *ucsd); 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_SHOW_CPLUSPLUS_API 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * \class LocalUCharsetDetectorPointer 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close(). 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For most methods see the LocalPointerBase base class. 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see LocalPointerBase 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see LocalPointer 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 4.4 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close); 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set the input byte data whose charset is to detected. 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Ownership of the input text byte array remains with the caller. 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The input string must not be altered or deleted until the charset 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * detector is either closed or reset to refer to different input text. 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd the charset detector to be used. 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param textIn the input text of unknown encoding. . 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param len the length of the input text, or -1 if the text 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is NUL terminated. 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status any error conditions are reported back in this variable. 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE void U_EXPORT2 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** Set the declared encoding for charset detection. 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The declared encoding of an input text is an encoding obtained 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * by the user from an http header or xml declaration or similar source that 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * can be provided as an additional hint to the charset detector. 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * How and whether the declared encoding will be used during the 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * detection process is TBD. 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd the charset detector to be used. 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param encoding an encoding for the current data obtained from 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a header or declaration or other source outside 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the byte data itself. 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param length the length of the encoding name, or -1 if the name string 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is NUL terminated. 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status any error conditions are reported back in this variable. 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE void U_EXPORT2 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status); 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the charset that best matches the supplied input data. 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note though, that because the detection 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * only looks at the start of the input data, 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * there is a possibility that the returned charset will fail to handle 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the full set of input data. 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The returned UCharsetMatch object is owned by the UCharsetDetector. 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It will remain valid until the detector input is reset, or until 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the detector is closed. 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The function will fail if 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <ul> 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <li>no charset appears to match the data.</li> 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <li>no input text has been provided</li> 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * </ul> 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd the charset detector to be used. 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status any error conditions are reported back in this variable. 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return a UCharsetMatch representing the best matching charset, 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or NULL if no charset matches the byte data. 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE const UCharsetMatch * U_EXPORT2 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status); 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Find all charset matches that appear to be consistent with the input, 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * returning an array of results. The results are ordered with the 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * best quality match first. 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Because the detection only looks at a limited amount of the 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * input byte data, some of the returned charsets may fail to handle 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the all of input data. 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The returned UCharsetMatch objects are owned by the UCharsetDetector. 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * They will remain valid until the detector is closed or modified 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return an error if 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <ul> 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <li>no charsets appear to match the input data.</li> 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <li>no input text has been provided</li> 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * </ul> 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd the charset detector to be used. 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param matchesFound pointer to a variable that will be set to the 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * number of charsets identified that are consistent with 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the input data. Output only. 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status any error conditions are reported back in this variable. 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return A pointer to an array of pointers to UCharSetMatch objects. 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This array, and the UCharSetMatch instances to which it refers, 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * are owned by the UCharsetDetector, and will remain valid until 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the detector is closed or modified. 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE const UCharsetMatch ** U_EXPORT2 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status); 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get the name of the charset represented by a UCharsetMatch. 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The storage for the returned name string is owned by the 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UCharsetMatch, and will remain valid while the UCharsetMatch 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is valid. 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The name returned is suitable for use with the ICU conversion APIs. 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsm The charset match object. 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return The name of the matching charset. 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE const char * U_EXPORT2 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status); 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get a confidence number for the quality of the match of the byte 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data with the charset. Confidence numbers range from zero to 100, 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * with 100 representing complete confidence and zero representing 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * no confidence. 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The confidence values are somewhat arbitrary. They define an 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * an ordering within the results for any single detection operation 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but are not generally comparable between the results for different input. 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A confidence value of ten does have a general meaning - it is used 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for charsets that can represent the input data, but for which there 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is no other indication that suggests that the charset is the correct one. 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Pure 7 bit ASCII data, for example, is compatible with a 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * great many charsets, most of which will appear as possible matches 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * with a confidence of 10. 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsm The charset match object. 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return A confidence number for the charset match. 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE int32_t U_EXPORT2 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status); 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get the RFC 3066 code for the language of the input data. 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The Charset Detection service is intended primarily for detecting 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * charsets, not language. For some, but not all, charsets, a language is 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * identified as a byproduct of the detection process, and that is what 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is returned by this function. 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * CAUTION: 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1. Language information is not available for input data encoded in 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * all charsets. In particular, no language is identified 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for UTF-8 input data. 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2. Closely related languages may sometimes be confused. 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If more accurate language detection is required, a linguistic 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * analysis package should be used. 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The storage for the returned name string is owned by the 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UCharsetMatch, and will remain valid while the UCharsetMatch 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is valid. 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsm The charset match object. 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return The RFC 3066 code for the language of the input data, or 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * an empty string if the language could not be determined. 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE const char * U_EXPORT2 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status); 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get the entire input text as a UChar string, placing it into 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a caller-supplied buffer. A terminating 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * NUL character will be appended to the buffer if space is available. 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The number of UChars in the output string, not including the terminating 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * NUL, is returned. 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the supplied buffer is smaller than required to hold the output, 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the contents of the buffer are undefined. The full output string length 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (in UChars) is returned as always, and can be used to allocate a buffer 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the correct size. 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsm The charset match object. 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param buf A UChar buffer to be filled with the converted text data. 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param cap The capacity of the buffer in UChars. 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return The number of UChars in the output string. 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE int32_t U_EXPORT2 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getUChars(const UCharsetMatch *ucsm, 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *buf, int32_t cap, UErrorCode *status); 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get an iterator over the set of all detectable charsets - 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over the charsets that are known to the charset detection 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * service. 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The returned UEnumeration provides access to the names of 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the charsets. 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The state of the Charset detector that is passed in does not 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * affect the result of this function, but requiring a valid, open 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * charset detector as a parameter insures that the charset detection 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * service has been safely initialized and that the required detection 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data is available. 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <b>Note:</b> Multiple different charset encodings in a same family may use 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single shared name in this implementation. For example, this method returns 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252" 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (Windows Latin 1). However, actual detection result could be "windows-1252" 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * when the input data matches Latin 1 code points with any points only available 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in "windows-1252". 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd a Charset detector. 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return an iterator providing access to the detectable charset names. 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE UEnumeration * U_EXPORT2 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test whether input filtering is enabled for this charset detector. 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Input filtering removes text that appears to be HTML or xml 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * markup from the input before applying the code page detection 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * heuristics. 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd The charset detector to check. 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return TRUE if filtering is enabled. 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE UBool U_EXPORT2 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd); 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Enable filtering of input text. If filtering is enabled, 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * text within angle brackets ("<" and ">") will be removed 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * before detection, which will remove most HTML or xml markup. 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd the charset detector to be modified. 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param filter <code>true</code> to enable input text filtering. 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return The previous setting. 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 3.6 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_STABLE UBool U_EXPORT2 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter); 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifndef U_HIDE_INTERNAL_API 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get an iterator over the set of detectable charsets - 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over the charsets that are enabled by the specified charset detector. 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The returned UEnumeration provides access to the names of 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the charsets. 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd a Charset detector. 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Any error conditions are reported back in this variable. 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return an iterator providing access to the detectable charset names by 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the specified charset detector. 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @internal 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_INTERNAL UEnumeration * U_EXPORT2 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Enable or disable individual charset encoding. 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A name of charset encoding must be included in the names returned by 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * {@link #getAllDetectableCharsets()}. 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ucsd a Charset detector. 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param encoding encoding the name of charset encoding. 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * charset encoding. 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status receives the return status. When the name of charset encoding 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set. 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @internal 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_INTERNAL void U_EXPORT2 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status); 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* U_HIDE_INTERNAL_API */ 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* __UCSDET_H */ 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 414