1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ******************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 2005-2007, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ******************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucsdet.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "csdetect.h" 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "csmatch.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_USE 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define DELETE_ARRAY(array) uprv_free((void *) (array)) 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UCharsetDetector * U_EXPORT2 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_open(UErrorCode *status) 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharsetDetector* csd = new CharsetDetector(*status); 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete csd; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) csd = NULL; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UCharsetDetector *) csd; 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_close(UCharsetDetector *ucsd) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharsetDetector *csd = (CharsetDetector *) ucsd; 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete csd; 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((CharsetDetector *) ucsd)->setText(textIn, len); 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const char * U_EXPORT2 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status) 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((CharsetMatch *) ucsm)->getName(); 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status) 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((CharsetMatch *) ucsm)->getConfidence(); 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const char * U_EXPORT2 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status) 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((CharsetMatch *) ucsm)->getLanguage(); 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const UCharsetMatch * U_EXPORT2 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status) 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status); 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status) 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length); 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const UCharsetMatch** 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_detectAll(UCharsetDetector *ucsd, 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *maxMatchesFound, UErrorCode *status) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharsetDetector *csd = (CharsetDetector *) ucsd; 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U_CAPI const char * U_EXPORT2 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status) 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// { 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// if(U_FAILURE(*status)) { 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// return 0; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// } 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// return csd->getCharsetName(index,*status); 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// } 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U_CAPI int32_t U_EXPORT2 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status) 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// { 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// if(U_FAILURE(*status)) { 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// return -1; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// } 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// return UCharsetDetector::getDetectableCount(); 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// } 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd) 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // todo: could use an error return... 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ucsd == NULL) { 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((CharsetDetector *) ucsd)->getStripTagsFlag(); 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter) 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // todo: could use an error return... 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ucsd == NULL) { 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharsetDetector *csd = (CharsetDetector *) ucsd; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool prev = csd->getStripTagsFlag(); 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) csd->setStripTagsFlag(filter); 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return prev; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucsdet_getUChars(const UCharsetMatch *ucsm, 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *buf, int32_t cap, UErrorCode *status) 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status); 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 181