1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2002-2009, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: ucnv_u32.c 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2002jul01 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Markus W. Scherer 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* UTF-32 converter implementation. Used to be in ucnv_utf.c. 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_bld.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_cnv.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define MAXIMUM_UCS2 0x0000FFFF 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define MAXIMUM_UTF 0x0010FFFF 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define HALF_SHIFT 10 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define HALF_BASE 0x0010000 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define HALF_MASK 0x3FF 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define SURROGATE_HIGH_START 0xD800 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define SURROGATE_LOW_START 0xDC00 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* -SURROGATE_LOW_START + HALF_BASE */ 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define SURROGATE_LOW_BASE 9216 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_NEED_TO_WRITE_BOM=1 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* UTF-32BE ----------------------------------------------------------------- */ 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *mySource = (unsigned char *) args->source; 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *myTarget = args->target; 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *targetLimit = args->targetLimit; 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *toUBytes = args->converter->toUBytes; 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t ch, i; 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Restore state of current sequence */ 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->toUnicodeStatus && myTarget < targetLimit) { 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = args->converter->toULength; /* restore # of bytes consumed */ 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 0; 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = 0; 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto morebytes; 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) { 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = 0; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = 0; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)morebytes: 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (i < sizeof(uint32_t)) { 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) { 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = (ch << 8) | (uint8_t)(*mySource); 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUBytes[i++] = (char) *(mySource++); 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* stores a partially calculated target*/ 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* + 1 to make 0 a valid character */ 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = ch + 1; 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t) i; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto donefornow; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UCS2) 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fits in 16 bits */ 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar) ch; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write out the surrogates */ 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = U16_LEAD(ch); 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = U16_TRAIL(ch); 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) { 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar)ch; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Put in overflow buffer (not handled here) */ 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBuffer[0] = (UChar) ch; 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBufferLength = 1; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)i; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)donefornow: 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* End of target buffer */ 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = myTarget; 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *) mySource; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *mySource = (unsigned char *) args->source; 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *myTarget = args->target; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *myOffsets = args->offsets; 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *targetLimit = args->targetLimit; 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *toUBytes = args->converter->toUBytes; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t ch, i; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetNum = 0; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Restore state of current sequence */ 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->toUnicodeStatus && myTarget < targetLimit) { 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = args->converter->toULength; /* restore # of bytes consumed */ 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 0; 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = 0; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto morebytes; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) { 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = 0; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = 0; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)morebytes: 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (i < sizeof(uint32_t)) { 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) { 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = (ch << 8) | (uint8_t)(*mySource); 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUBytes[i++] = (char) *(mySource++); 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* stores a partially calculated target*/ 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* + 1 to make 0 a valid character */ 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = ch + 1; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t) i; 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto donefornow; 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UCS2) { 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fits in 16 bits */ 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar) ch; 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write out the surrogates */ 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = U16_LEAD(ch); 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *myOffsets++ = offsetNum; 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = U16_TRAIL(ch); 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar)ch; 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Put in overflow buffer (not handled here) */ 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBuffer[0] = (UChar) ch; 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBufferLength = 1; 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)i; 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetNum += i; 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)donefornow: 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* End of target buffer */ 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = myTarget; 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *) mySource; 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->offsets = myOffsets; 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *mySource = args->source; 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *myTarget; 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *sourceLimit = args->sourceLimit; 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch, ch2; 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned int indexToWrite; 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char temp[sizeof(uint32_t)]; 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(mySource >= sourceLimit) { 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input, nothing to do */ 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the BOM if necessary */ 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUWriteBytes(args->converter, 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bom, 4, 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->target, args->targetLimit, 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->offsets, -1, 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err); 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUnicodeStatus=0; 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myTarget = (unsigned char *) args->target; 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[0] = 0; 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->fromUChar32) { 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->fromUChar32; 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = 0; 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto lowsurogate; 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) { 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = *(mySource++); 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (UTF_IS_SURROGATE(ch)) { 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_LEAD(ch)) { 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)lowsurogate: 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) { 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch2 = *mySource; 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_TRAIL(ch2)) { 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource++; 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ran out of source */ 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->flush) { 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[1] = (uint8_t) (ch >> 16 & 0x1F); 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) { 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = temp[indexToWrite]; 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = (char *) myTarget; 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = mySource; 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *mySource = args->source; 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *myTarget; 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *myOffsets; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *sourceLimit = args->sourceLimit; 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch, ch2; 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetNum = 0; 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned int indexToWrite; 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char temp[sizeof(uint32_t)]; 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(mySource >= sourceLimit) { 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input, nothing to do */ 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the BOM if necessary */ 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUWriteBytes(args->converter, 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bom, 4, 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->target, args->targetLimit, 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->offsets, -1, 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err); 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUnicodeStatus=0; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myTarget = (unsigned char *) args->target; 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myOffsets = args->offsets; 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[0] = 0; 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->fromUChar32) { 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->fromUChar32; 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = 0; 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto lowsurogate; 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) { 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = *(mySource++); 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (UTF_IS_SURROGATE(ch)) { 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_LEAD(ch)) { 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)lowsurogate: 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) { 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch2 = *mySource; 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_TRAIL(ch2)) { 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource++; 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ran out of source */ 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->flush) { 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[1] = (uint8_t) (ch >> 16 & 0x1F); 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) { 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = temp[indexToWrite]; 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetNum = offsetNum + 1 + (temp[1] != 0); 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = (char *) myTarget; 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = mySource; 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->offsets = myOffsets; 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode* err) 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *mySource; 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 myUChar; 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length; 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource = (const uint8_t *)args->source; 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource >= (const uint8_t *)args->sourceLimit) 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input */ 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_INDEX_OUTOFBOUNDS_ERROR; 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length < 4) 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* got a partial character */ 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(args->converter->toUBytes, mySource, length); 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)length; 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *)(mySource + length); 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_TRUNCATED_CHAR_FOUND; 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Don't even try to do a direct cast because the value may be on an odd address. */ 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myUChar = ((UChar32)mySource[0] << 24) 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[1] << 16) 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[2] << 8) 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[3]); 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *)(mySource + 4); 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return myUChar; 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(args->converter->toUBytes, mySource, 4); 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 4; 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterImpl _UTF32BEImpl = { 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_UTF32_BigEndian, 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE, 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_BE, 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_getNextUChar_UTF32_BE, 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_getNonSurrogateUnicodeSet 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterStaticData _UTF32BEStaticData = { 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterStaticData), 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "UTF-32BE", 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1232, 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UConverterSharedData _UTF32BEData = { 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterSharedData), ~((uint32_t) 0), 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl, 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* UTF-32LE ---------------------------------------------------------- */ 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *mySource = (unsigned char *) args->source; 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *myTarget = args->target; 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *targetLimit = args->targetLimit; 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *toUBytes = args->converter->toUBytes; 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t ch, i; 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Restore state of current sequence */ 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->toUnicodeStatus && myTarget < targetLimit) 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = args->converter->toULength; /* restore # of bytes consumed */ 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 0; 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Stores the previously calculated ch from a previous call*/ 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->toUnicodeStatus - 1; 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = 0; 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto morebytes; 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = 0; 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = 0; 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)morebytes: 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (i < sizeof(uint32_t)) 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch |= ((uint8_t)(*mySource)) << (i * 8); 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUBytes[i++] = (char) *(mySource++); 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* stores a partially calculated target*/ 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* + 1 to make 0 a valid character */ 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = ch + 1; 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t) i; 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto donefornow; 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UCS2) { 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fits in 16 bits */ 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar) ch; 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write out the surrogates */ 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = U16_LEAD(ch); 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = U16_TRAIL(ch); 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) { 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar)ch; 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Put in overflow buffer (not handled here) */ 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBuffer[0] = (UChar) ch; 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBufferLength = 1; 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)i; 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)donefornow: 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* End of target buffer */ 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = myTarget; 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *) mySource; 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *mySource = (unsigned char *) args->source; 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *myTarget = args->target; 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *myOffsets = args->offsets; 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *targetLimit = args->targetLimit; 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *toUBytes = args->converter->toUBytes; 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t ch, i; 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetNum = 0; 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Restore state of current sequence */ 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->toUnicodeStatus && myTarget < targetLimit) 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = args->converter->toULength; /* restore # of bytes consumed */ 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 0; 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Stores the previously calculated ch from a previous call*/ 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->toUnicodeStatus - 1; 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = 0; 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto morebytes; 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i = 0; 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = 0; 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)morebytes: 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (i < sizeof(uint32_t)) 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch |= ((uint8_t)(*mySource)) << (i * 8); 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUBytes[i++] = (char) *(mySource++); 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* stores a partially calculated target*/ 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* + 1 to make 0 a valid character */ 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toUnicodeStatus = ch + 1; 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t) i; 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto donefornow; 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ch <= MAXIMUM_UCS2) 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fits in 16 bits */ 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar) ch; 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write out the surrogates */ 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = U16_LEAD(ch); 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = U16_TRAIL(ch); 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = (UChar)ch; 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Put in overflow buffer (not handled here) */ 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBuffer[0] = (UChar) ch; 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->UCharErrorBufferLength = 1; 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)i; 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetNum += i; 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)donefornow: 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* End of target buffer */ 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = myTarget; 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *) mySource; 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->offsets = myOffsets; 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *mySource = args->source; 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *myTarget; 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *sourceLimit = args->sourceLimit; 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch, ch2; 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned int indexToWrite; 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char temp[sizeof(uint32_t)]; 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(mySource >= sourceLimit) { 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input, nothing to do */ 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the BOM if necessary */ 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUWriteBytes(args->converter, 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bom, 4, 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->target, args->targetLimit, 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->offsets, -1, 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err); 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUnicodeStatus=0; 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myTarget = (unsigned char *) args->target; 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[3] = 0; 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->fromUChar32) 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->fromUChar32; 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = 0; 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto lowsurogate; 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = *(mySource++); 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (UTF_IS_SURROGATE(ch)) { 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_LEAD(ch)) 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)lowsurogate: 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch2 = *mySource; 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_TRAIL(ch2)) { 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource++; 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ran out of source */ 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->flush) { 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[2] = (uint8_t) (ch >> 16 & 0x1F); 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = temp[indexToWrite]; 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = (char *) myTarget; 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = mySource; 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode * err) 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *mySource = args->source; 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char *myTarget; 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *myOffsets; 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *sourceLimit = args->sourceLimit; 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch, ch2; 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned int indexToWrite; 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned char temp[sizeof(uint32_t)]; 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetNum = 0; 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(mySource >= sourceLimit) { 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input, nothing to do */ 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the BOM if necessary */ 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUWriteBytes(args->converter, 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bom, 4, 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->target, args->targetLimit, 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &args->offsets, -1, 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err); 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUnicodeStatus=0; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myTarget = (unsigned char *) args->target; 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myOffsets = args->offsets; 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[3] = 0; 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->converter->fromUChar32) 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = args->converter->fromUChar32; 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = 0; 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto lowsurogate; 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (mySource < sourceLimit && myTarget < targetLimit) 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = *(mySource++); 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (UTF_IS_SURROGATE(ch)) { 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_LEAD(ch)) 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)lowsurogate: 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit) 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch2 = *mySource; 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_IS_TRAIL(ch2)) 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource++; 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ran out of source */ 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (args->flush) { 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is an unmatched trail code unit (2nd surrogate) */ 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* callback(illegal) */ 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->fromUChar32 = ch; 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[2] = (uint8_t) (ch >> 16 & 0x1F); 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (myTarget < targetLimit) 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myTarget++) = temp[indexToWrite]; 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(myOffsets++) = offsetNum; 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetNum = offsetNum + 1 + (temp[2] != 0); 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_BUFFER_OVERFLOW_ERROR; 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->target = (char *) myTarget; 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = mySource; 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->offsets = myOffsets; 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode* err) 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *mySource; 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 myUChar; 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length; 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mySource = (const uint8_t *)args->source; 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mySource >= (const uint8_t *)args->sourceLimit) 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no input */ 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_INDEX_OUTOFBOUNDS_ERROR; 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length < 4) 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* got a partial character */ 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(args->converter->toUBytes, mySource, length); 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = (int8_t)length; 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *)(mySource + length); 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_TRUNCATED_CHAR_FOUND; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Don't even try to do a direct cast because the value may be on an odd address. */ 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myUChar = ((UChar32)mySource[3] << 24) 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[2] << 16) 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[1] << 8) 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((UChar32)mySource[0]); 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->source = (const char *)(mySource + 4); 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return myUChar; 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(args->converter->toUBytes, mySource, 4); 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) args->converter->toULength = 4; 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *err = U_ILLEGAL_CHAR_FOUND; 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterImpl _UTF32LEImpl = { 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_UTF32_LittleEndian, 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_LE, 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_LE, 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_getNextUChar_UTF32_LE, 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_getNonSurrogateUnicodeSet 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterStaticData _UTF32LEStaticData = { 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterStaticData), 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "UTF-32LE", 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1234, 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UConverterSharedData _UTF32LEData = { 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterSharedData), ~((uint32_t) 0), 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl, 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* UTF-32 (Detect BOM) ------------------------------------------------------ */ 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * accordingly. 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * State values: 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 0 initial state 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1 saw 00 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 2 saw 00 00 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 3 saw 00 00 FE 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 4 - 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 5 saw FF 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 6 saw FF FE 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 7 saw FF FE 00 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 8 UTF-32BE mode 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 9 UTF-32LE mode 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * During detection: state&3==number of matching bytes so far. 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * On output, emit U+FEFF as the first code point. 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(choice<=UCNV_RESET_TO_UNICODE) { 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reset toUnicode: state=0 */ 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->mode=0; 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(choice!=UCNV_RESET_TO_UNICODE) { 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reset fromUnicode: prepare to output the UTF-32PE BOM */ 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_UTF32Open(UConverter *cnv, 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterLoadArgs *pArgs, 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32Reset(cnv, UCNV_RESET_BOTH); 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverter *cnv=pArgs->converter; 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *source=pArgs->source; 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *sourceLimit=pArgs->sourceLimit; 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *offsets=pArgs->offsets; 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t state, offsetDelta; 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char b; 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=cnv->mode; 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If we detect a BOM in this buffer, then we must add the BOM size to the 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * offsets because the actual converter function will not see and count the BOM. 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * offsetDelta will have the number of the BOM bytes that are in the current buffer. 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetDelta=0; 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(state) { 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 0: 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b=*source; 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(b==0) { 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=1; /* could be 00 00 FE FF */ 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(b==(char)0xff) { 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=5; /* could be FF FE 00 00 */ 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=8; /* default to UTF-32BE */ 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++source; 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 1: 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 2: 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 3: 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 5: 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 6: 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 7: 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*source==utf32BOM[state]) { 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++state; 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++source; 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(state==4) { 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=8; /* detect UTF-32BE */ 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetDelta=(int32_t)(source-pArgs->source); 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(state==8) { 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=9; /* detect UTF-32LE */ 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsetDelta=(int32_t)(source-pArgs->source); 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* switch to UTF-32BE and pass the previous bytes */ 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reset the source */ 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source=pArgs->source; 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(count==(state&3)) { 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* simple: all in the same buffer, just reset source */ 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool oldFlush=pArgs->flush; 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* some of the bytes are from a previous buffer, replay those first */ 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no offsets: bytes from previous buffer, and not enough for output */ 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* restore real pointers; pArgs->source will be set in case 8/9 */ 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->sourceLimit=sourceLimit; 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->flush=oldFlush; 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=8; 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 8: 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* call UTF-32BE */ 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=source; 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets==NULL) { 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source=pArgs->source; 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 9: 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* call UTF-32LE */ 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=source; 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets==NULL) { 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source=pArgs->source; 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; /* does not occur */ 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* add BOM size to offsets - see comment at offsetDelta declaration */ 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offsets!=NULL && offsetDelta!=0) { 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *offsetsLimit=pArgs->offsets; 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(offsets<offsetsLimit) { 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *offsets++ += offsetDelta; 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=source; 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(source==sourceLimit && pArgs->flush) { 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* handle truncated input */ 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(state) { 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 0: 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; /* no input at all, nothing to do */ 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 8: 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 9: 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* handle 0<state<8: call UTF-32BE with too-short input */ 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no offsets: not enough for output */ 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=source; 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->sourceLimit=sourceLimit; 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state=8; 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->mode=state; 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(pArgs->converter->mode) { 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 8: 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 9: 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UCNV_GET_NEXT_UCHAR_USE_TO_U; 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterImpl _UTF32Impl = { 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_UTF32, 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32Open, 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32Reset, 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32ToUnicodeWithOffsets, 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32ToUnicodeWithOffsets, 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if U_IS_BIG_ENDIAN 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_BE, 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_LE, 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _UTF32GetNextUChar, 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_getNonSurrogateUnicodeSet 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UConverterStaticData _UTF32StaticData = { 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterStaticData), 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "UTF-32", 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1236, 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_IBM, UCNV_UTF32, 4, 4, 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if U_IS_BIG_ENDIAN 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0, 0, 0xff, 0xfd }, 4, 1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0xfd, 0xff, 0, 0 }, 4, 1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FALSE, FALSE, 1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, 1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UConverterSharedData _UTF32Data = { 1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(UConverterSharedData), ~((uint32_t) 0), 1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl, 1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0 1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1249