1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru******************************************************************************* 3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2008-2011, International Business Machines 5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Corporation, Google and others. All Rights Reserved. 6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru******************************************************************************* 8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/ 9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Author : eldawy@google.com (Mohamed Eldawy) 10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// ucnvsel.cpp 11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Purpose: To generate a list of encodings capable of handling 13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// a given Unicode text 14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Started 09-April-2008 16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 17b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * \file 19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * This is an implementation of an encoding selector. 21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The goal is, given a unicode string, find the encodings 22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * this string can be mapped to. To make processing faster 23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * a trie is built when you call ucnvsel_open() that 24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * stores all encodings a codepoint can map to 25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/ucnvsel.h" 28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if !UCONFIG_NO_CONVERSION 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include <string.h> 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uchar.h" 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uniset.h" 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/ucnv.h" 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/ustring.h" 37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uchriter.h" 38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "utrie2.h" 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "propsvec.h" 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uassert.h" 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "ucmndata.h" 42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uenumimp.h" 43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "cmemory.h" 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "cstring.h" 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_NAMESPACE_USE 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct UConverterSelector { 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UTrie2 *trie; // 16 bit trie containing offsets into pv 50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t* pv; // table of bits! 51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t pvCount; 52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char** encodings; // which encodings did user ask to use? 53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t encodingsCount; 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t encodingStrLength; 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t* swapped; 56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool ownPv, ownEncodingStrings; 57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void generateSelectorData(UConverterSelector* result, 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UPropsVectors *upvec, 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const USet* excludedCodePoints, 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UConverterUnicodeSet whichSet, 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode* status) { 64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t columns = (result->encodingsCount+31)/32; 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // set errorValue to all-ones 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t col = 0; col < columns; col++) { 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru col, ~0, ~0, status); 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t i = 0; i < result->encodingsCount; ++i) { 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t mask; 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t column; 79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t item_count; 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t j; 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UConverter* test_converter = ucnv_open(result->encodings[i], status); 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USet* unicode_point_set; 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unicode_point_set = uset_open(1, 0); // empty set 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnv_getUnicodeSet(test_converter, unicode_point_set, 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru whichSet, status); 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnv_close(test_converter); 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru column = i / 32; 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mask = 1 << (i%32); 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // now iterate over intervals on set i! 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru item_count = uset_getItemCount(unicode_point_set); 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (j = 0; j < item_count; ++j) { 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 start_char; 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 end_char; 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode smallStatus = U_ZERO_ERROR; 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0, 105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &smallStatus); 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(smallStatus)) { 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // this will be reached for the converters that fill the set with 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // strings. Those should be ignored by our system 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru upvec_setValue(upvec, start_char, end_char, column, ~0, mask, 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status); 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnv_close(test_converter); 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_close(unicode_point_set); 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // handle excluded encodings! Simply set their values to all 1's in the upvec 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (excludedCodePoints) { 123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t item_count = uset_getItemCount(excludedCodePoints); 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t j = 0; j < item_count; ++j) { 125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 start_char; 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 end_char; 127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status); 130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t col = 0; col < columns; col++) { 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru upvec_setValue(upvec, start_char, end_char, col, ~0, ~0, 132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status); 133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // alright. Now, let's put things in the same exact form you'd get when you 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // unserialize things. 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status); 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->pvCount *= columns; // number of uint32_t = rows * columns 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->ownPv = TRUE; 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* open a selector. If converterListSize is 0, build for all converters. 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru If excludedCodePoints is NULL, don't exclude any codepoints */ 147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UConverterSelector* U_EXPORT2 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_open(const char* const* converterList, int32_t converterListSize, 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const USet* excludedCodePoints, 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UConverterUnicodeSet whichSet, UErrorCode* status) { 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ensure args make sense! 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) { 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // allocate a new converter 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUConverterSelectorPointer newSelector( 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector))); 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newSelector.isNull()) { 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector)); 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (converterListSize == 0) { 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru converterList = NULL; 172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru converterListSize = ucnv_countAvailable(); 173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->encodings = 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (char**)uprv_malloc(converterListSize * sizeof(char*)); 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!newSelector->encodings) { 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->encodings[0] = NULL; // now we can call ucnvsel_close() 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // make a backup copy of the list of converters 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t totalSize = 0; 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i = 0; i < converterListSize; i++) { 186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru totalSize += 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1; 188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 4-align the totalSize to 4-align the size of the serialized form 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t encodingStrPadding = totalSize & 3; 191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (encodingStrPadding != 0) { 192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru encodingStrPadding = 4 - encodingStrPadding; 193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->encodingStrLength = totalSize += encodingStrPadding; 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char* allStrings = (char*) uprv_malloc(totalSize); 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!allStrings) { 197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i = 0; i < converterListSize; i++) { 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->encodings[i] = allStrings; 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_strcpy(newSelector->encodings[i], 204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)); 205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru allStrings += uprv_strlen(newSelector->encodings[i]) + 1; 206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while (encodingStrPadding > 0) { 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *allStrings++ = 0; 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru --encodingStrPadding; 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->ownEncodingStrings = TRUE; 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru newSelector->encodingsCount = converterListSize; 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status); 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status); 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru upvec_close(upvec); 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return newSelector.orphan(); 223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* close opened selector */ 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_close(UConverterSelector *sel) { 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!sel) { 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sel->ownEncodingStrings) { 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel->encodings[0]); 233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel->encodings); 235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sel->ownPv) { 236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel->pv); 237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 238b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_close(sel->trie); 239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel->swapped); 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel); 241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic const UDataInfo dataInfo = { 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sizeof(UDataInfo), 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0, 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_IS_BIG_ENDIAN, 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_CHARSET_FAMILY, 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_SIZEOF_UCHAR, 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 0, 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */ 253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 1, 0, 0, 0 }, /* formatVersion */ 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 0, 0, 0, 0 } /* dataVersion */ 255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruenum { 258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes 259b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors 260b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names 261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding 262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader 263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCNVSEL_INDEX_COUNT = 16 264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Serialized form of a UConverterSelector, formatVersion 1: 268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The serialized form begins with a standard ICU DataHeader with a UDataInfo 270b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * as the template above. 271b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * This is followed by: 272b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above 273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes 274b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors 275b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding 276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 277b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 278b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* serialize a selector */ 279b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 280b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_serialize(const UConverterSelector* sel, 281b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void* buffer, int32_t bufferCapacity, UErrorCode* status) { 282b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 283b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 284b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 285b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 286b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ensure args make sense! 287b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t *p = (uint8_t *)buffer; 288b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (bufferCapacity < 0 || 289b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) 290b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 291b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 292b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 293b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 294b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // add up the size of the serialized form 295b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status); 296b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { 297b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 299b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 300b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 301b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DataHeader header; 302b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(&header, 0, sizeof(header)); 303b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15); 304b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru header.dataHeader.magic1 = 0xda; 305b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru header.dataHeader.magic2 = 0x27; 306b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo)); 307b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 308b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t indexes[UCNVSEL_INDEX_COUNT] = { 309b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru serializedTrieSize, 310b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->pvCount, 311b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodingsCount, 312b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodingStrLength 313b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 314b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 315b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t totalSize = 316b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru header.dataHeader.headerSize + 317b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (int32_t)sizeof(indexes) + 318b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru serializedTrieSize + 319b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->pvCount * 4 + 320b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodingStrLength; 321b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize; 322b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (totalSize > bufferCapacity) { 323b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return totalSize; 325b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ok, save! 327b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t length = header.dataHeader.headerSize; 328b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(p, &header, sizeof(header)); 329b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(p + sizeof(header), 0, length - sizeof(header)); 330b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += length; 331b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 332b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length = (int32_t)sizeof(indexes); 333b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(p, indexes, length); 334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += length; 335b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 336b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_serialize(sel->trie, p, serializedTrieSize, status); 337b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += serializedTrieSize; 338b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length = sel->pvCount * 4; 340b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(p, sel->pv, length); 341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += length; 342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength); 344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += sel->encodingStrLength; 345b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 346b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return totalSize; 347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 349b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/** 350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * swap a selector into the desired Endianness and Asciiness of 351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * the system. Just as FYI, selectors are always saved in the format 352b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * of the system that created them. They are only converted if used 353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * on another system. In other words, selectors created on different 354b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * system can be different even if the params are identical (endianness 355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * and Asciiness differences only) 356b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * 357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param ds pointer to data swapper containing swapping info 358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param inData pointer to incoming data 359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length length of inData in bytes 360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param outData pointer to output data. Capacity should 361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * be at least equal to capacity of inData 362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode 363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return 0 on failure, number of bytes swapped on success 364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * number of bytes swapped can be smaller than length 365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic int32_t 367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_swap(const UDataSwapper *ds, 368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const void *inData, int32_t length, 369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void *outData, UErrorCode *status) { 370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* udata_swapDataHeader checks the arguments */ 371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); 372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*status)) { 373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 375b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* check data format and format version */ 377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); 378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(!( 379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ 380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[1] == 0x53 && 381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[2] == 0x65 && 382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[3] == 0x6c 383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru )) { 384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", 385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[0], pInfo->dataFormat[1], 386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[2], pInfo->dataFormat[3]); 387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INVALID_FORMAT_ERROR; 388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 389b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pInfo->formatVersion[0] != 1) { 391b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", 392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->formatVersion[0]); 393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_UNSUPPORTED_ERROR; 394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(length >= 0) { 398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length -= headerSize; 399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(length < 16*4) { 400b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", 401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length); 402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 407b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint8_t *inBytes = (const uint8_t *)inData + headerSize; 408b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t *outBytes = (uint8_t *)outData + headerSize; 409b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 410b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* read the indexes */ 411b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const int32_t *inIndexes = (const int32_t *)inBytes; 412b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t indexes[16]; 413b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 414b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(i = 0; i < 16; ++i) { 415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru indexes[i] = udata_readInt32(ds, inIndexes[i]); 416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 417b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 418b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* get the total length of the data */ 419b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t size = indexes[UCNVSEL_INDEX_SIZE]; 420b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(length >= 0) { 421b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(length < size) { 422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", 423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length); 424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* copy the data for inaccessible bytes */ 429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(inBytes != outBytes) { 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(outBytes, inBytes, size); 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t offset = 0, count; 434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* swap the int32_t indexes[] */ 436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru count = UCNVSEL_INDEX_COUNT*4; 437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray32(ds, inBytes, count, outBytes, status); 438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru offset += count; 439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* swap the UTrie2 */ 441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; 442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); 443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru offset += count; 444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* swap the uint32_t pv[] */ 446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; 447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); 448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru offset += count; 449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* swap the encoding names */ 451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; 452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); 453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru offset += count; 454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(offset == size); 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return headerSize + size; 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* unserialize a selector */ 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UConverterSelector* U_EXPORT2 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) { 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ensure args make sense! 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint8_t *p = (const uint8_t *)buffer; 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length <= 0 || 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // header 477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < 32) { 478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // not even enough space for a minimal header 479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const DataHeader *pHeader = (const DataHeader *)p; 483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!( 484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->dataHeader.magic1==0xda && 485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->dataHeader.magic2==0x27 && 486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->info.dataFormat[0] == 0x43 && 487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->info.dataFormat[1] == 0x53 && 488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->info.dataFormat[2] == 0x65 && 489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->info.dataFormat[3] == 0x6c 490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru )) { 491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* header not valid or dataFormat not recognized */ 492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INVALID_FORMAT_ERROR; 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pHeader->info.formatVersion[0] != 1) { 496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_UNSUPPORTED_ERROR; 497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t* swapped = NULL; 500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN || 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader->info.charsetFamily != U_CHARSET_FAMILY 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // swap the data 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UDataSwapper *ds = 505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status); 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status); 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_closeSwapper(ds); 509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < totalSize) { 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_closeSwapper(ds); 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru swapped = (uint8_t*)uprv_malloc(totalSize); 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (swapped == NULL) { 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_closeSwapper(ds); 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_swap(ds, p, length, swapped, status); 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_closeSwapper(ds); 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(swapped); 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p = swapped; 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pHeader = (const DataHeader *)p; 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < (pHeader->dataHeader.headerSize + 16 * 4)) { 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // not even enough space for the header and the indexes 533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(swapped); 534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += pHeader->dataHeader.headerSize; 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length -= pHeader->dataHeader.headerSize; 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // indexes 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const int32_t *indexes = (const int32_t *)p; 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < indexes[UCNVSEL_INDEX_SIZE]) { 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(swapped); 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INDEX_OUTOFBOUNDS_ERROR; 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += UCNVSEL_INDEX_COUNT * 4; 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // create and populate the selector object 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)); 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char **encodings = 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (char **)uprv_malloc( 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *)); 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sel == NULL || encodings == NULL) { 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(swapped); 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(sel); 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(encodings); 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(sel, 0, sizeof(UConverterSelector)); 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT]; 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodings = encodings; 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT]; 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->swapped = swapped; 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // trie 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL, 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status); 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += indexes[UCNVSEL_INDEX_TRIE_SIZE]; 570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_close(sel); 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // bit vectors 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->pv = (uint32_t *)p; 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += sel->pvCount * 4; 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // encoding names 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char* s = (char*)p; 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t i = 0; i < sel->encodingsCount; ++i) { 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel->encodings[i] = s; 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s += uprv_strlen(s) + 1; 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p += sel->encodingStrLength; 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return sel; 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// a bunch of functions for the enumeration thingie! Nothing fancy here. Just 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// iterate over the selected encodings 590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct Enumerator { 591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t* index; 592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t length; 593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t cur; 594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UConverterSelector* sel; 595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CDECL_BEGIN 598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void U_CALLCONV 600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_close_selector_iterator(UEnumeration *enumerator) { 601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(((Enumerator*)(enumerator->context))->index); 602b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(enumerator->context); 603b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(enumerator); 604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 605b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic int32_t U_CALLCONV 608b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) { 609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return ((Enumerator*)(enumerator->context))->length; 614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator, 618b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t* resultLength, 619b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode* status) { 620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 621b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 624b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t cur = ((Enumerator*)(enumerator->context))->cur; 626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UConverterSelector* sel; 627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char* result; 628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (cur >= ((Enumerator*)(enumerator->context))->length) { 629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sel = ((Enumerator*)(enumerator->context))->sel; 632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ]; 633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ((Enumerator*)(enumerator->context))->cur++; 634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (resultLength) { 63550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *resultLength = (int32_t)uprv_strlen(result); 636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator, 641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode* status) { 642b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 644b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return ; 645b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ((Enumerator*)(enumerator->context))->cur = 0; 647b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CDECL_END 650b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic const UEnumeration defaultEncodings = { 653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru NULL, 654b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru NULL, 655b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_close_selector_iterator, 656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_count_encodings, 657b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uenum_unextDefault, 658b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_next_encoding, 659b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucnvsel_reset_iterator 660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 663b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// internal fn to intersect two sets of masks 664b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// returns whether the mask has reduced to all zeros 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) { 666b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 667b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t oredDest = 0; 668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i = 0 ; i < len ; ++i) { 669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru oredDest |= (dest[i] &= source1[i]); 670b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return oredDest == 0; 672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 674b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// internal fn to count how many 1's are there in a mask 675b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html 67650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic int16_t countOnes(uint32_t* mask, int32_t len) { 677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i, totalOnes = 0; 678b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i = 0 ; i < len ; ++i) { 679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t ent = mask[i]; 680b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (; ent; totalOnes++) 681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ent &= ent - 1; // clear the least significant bit set 683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return totalOnes; 686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* internal function! */ 690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UEnumeration *selectForMask(const UConverterSelector* sel, 691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t *mask, UErrorCode *status) { 692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // this is the context we will use. Store a table of indices to which 693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // encodings are legit. 694b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru struct Enumerator* result = (Enumerator*)uprv_malloc(sizeof(Enumerator)); 695b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (result == NULL) { 696b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(mask); 697b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 698b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 699b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->index = NULL; // this will be allocated later! 701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->length = result->cur = 0; 702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->sel = sel; 703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (en == NULL) { 706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO(markus): Combine Enumerator and UEnumeration into one struct. 707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(mask); 708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(result); 709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 712b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru memcpy(en, &defaultEncodings, sizeof(UEnumeration)); 713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru en->context = result; 714b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t columns = (sel->encodingsCount+31)/32; 716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t numOnes = countOnes(mask, columns); 717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // now, we know the exact space we need for index 718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (numOnes > 0) { 719b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->index = (int16_t*) uprv_malloc(numOnes * sizeof(int16_t)); 720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i, j; 722b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t k = 0; 723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (j = 0 ; j < columns; j++) { 724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t v = mask[j]; 725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) { 726b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((v & 1) != 0) { 727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->index[result->length++] = k; 728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru v >>= 1; 730b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 732b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } //otherwise, index will remain NULL (and will never be touched by 733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //the enumerator code anyway) 734b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(mask); 735b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return en; 736b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 738b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* check a string against the selector - UTF16 version */ 739b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UEnumeration * U_EXPORT2 740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_selectForString(const UConverterSelector* sel, 741b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *s, int32_t length, UErrorCode *status) { 742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 743b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 745b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ensure args make sense! 747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sel == NULL || (s == NULL && length != 0)) { 748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t columns = (sel->encodingsCount+31)/32; 753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); 754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (mask == NULL) { 755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(mask, ~0, columns *4); 759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 76083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(s!=NULL) { 76183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *limit; 76283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (length >= 0) { 76383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius limit = s + length; 76483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else { 76583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius limit = NULL; 76683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 76783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 76883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius while (limit == NULL ? *s != 0 : s != limit) { 76983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 c; 77083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint16_t pvIndex; 77183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); 77283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (intersectMasks(mask, sel->pv+pvIndex, columns)) { 77383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius break; 77483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return selectForMask(sel, mask, status); 778b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* check a string against the selector - UTF8 version */ 781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UEnumeration * U_EXPORT2 782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruucnvsel_selectForUTF8(const UConverterSelector* sel, 783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *s, int32_t length, UErrorCode *status) { 784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // check if already failed 785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ensure args make sense! 789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sel == NULL || (s == NULL && length != 0)) { 790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 793b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 794b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t columns = (sel->encodingsCount+31)/32; 795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); 796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (mask == NULL) { 797b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(mask, ~0, columns *4); 801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < 0) { 80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length = (int32_t)uprv_strlen(s); 804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 80683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(s!=NULL) { 80783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const char *limit = s + length; 80883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 80983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius while (s != limit) { 81083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint16_t pvIndex; 81183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); 81283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (intersectMasks(mask, sel->pv+pvIndex, columns)) { 81383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius break; 81483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return selectForMask(sel, mask, status); 818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif // !UCONFIG_NO_CONVERSION 821