1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 1999-2004, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* uconv_cnv.c: 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Implements all the low level conversion functions 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* T_UnicodeConverter_{to,from}Unicode_$ConversionType 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Change history: 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 06/29/2000 helena Major rewrite of the callback APIs. 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv_err.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uset.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_cnv.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_bld.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_getCompleteUnicodeSet(const UConverter *cnv, 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const USetAdder *sa, 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterUnicodeSet which, 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->addRange(sa->set, 0, 0x10ffff); 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const USetAdder *sa, 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterUnicodeSet which, 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->addRange(sa->set, 0, 0xd7ff); 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->addRange(sa->set, 0xe000, 0x10ffff); 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_fromUWriteBytes(UConverter *cnv, 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *bytes, int32_t length, 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char **target, const char *targetLimit, 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceIndex, 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *t=*target; 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *o; 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write bytes */ 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets==NULL || (o=*offsets)==NULL) { 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && t<targetLimit) { 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=*bytes++; 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output with offsets */ 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && t<targetLimit) { 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=*bytes++; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *o++=sourceIndex; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *offsets=o; 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *target=t; 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write overflow */ 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length>0) { 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cnv!=NULL) { 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t=(char *)cnv->charErrorBuffer; 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->charErrorBufferLength=(int8_t)length; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=(uint8_t)*bytes++; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while(--length>0); 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_toUWriteUChars(UConverter *cnv, 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *uchars, int32_t length, 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar **target, const UChar *targetLimit, 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceIndex, 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *t=*target; 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *o; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write UChars */ 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets==NULL || (o=*offsets)==NULL) { 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && t<targetLimit) { 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=*uchars++; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output with offsets */ 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && t<targetLimit) { 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=*uchars++; 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *o++=sourceIndex; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *offsets=o; 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *target=t; 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write overflow */ 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length>0) { 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cnv!=NULL) { 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t=cnv->UCharErrorBuffer; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->UCharErrorBufferLength=(int8_t)length; 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=*uchars++; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while(--length>0); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_toUWriteCodePoint(UConverter *cnv, 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar **target, const UChar *targetLimit, 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceIndex, 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *t; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *o; 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t=*target; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t<targetLimit) { 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(c<=0xffff) { 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=(UChar)c; 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=U_SENTINEL; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* c is a supplementary code point */ { 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=U16_LEAD(c); 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=U16_TRAIL(c); 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t<targetLimit) { 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *t++=(UChar)c; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=U_SENTINEL; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write offsets */ 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets!=NULL && (o=*offsets)!=NULL) { 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *o++=sourceIndex; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((*target+1)<t) { 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *o++=sourceIndex; 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *offsets=o; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *target=t; 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write overflow from c */ 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(c>=0) { 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cnv!=NULL) { 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int8_t i=0; 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c); 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->UCharErrorBufferLength=i; 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 175