1/*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2013, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  usprep.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003jul2
14 *   created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/usprep.h"
22
23#include "unicode/unorm.h"
24#include "unicode/ustring.h"
25#include "unicode/uchar.h"
26#include "unicode/uversion.h"
27#include "umutex.h"
28#include "cmemory.h"
29#include "sprpimpl.h"
30#include "ustr_imp.h"
31#include "uhash.h"
32#include "cstring.h"
33#include "udataswp.h"
34#include "ucln_cmn.h"
35#include "ubidi_props.h"
36
37U_NAMESPACE_USE
38
39U_CDECL_BEGIN
40
41/*
42Static cache for already opened StringPrep profiles
43*/
44static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45static icu::UInitOnce gSharedDataInitOnce;
46
47static UMutex usprepMutex = U_MUTEX_INITIALIZER;
48
49/* format version of spp file */
50//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
51
52/* the Unicode version of the sprep data */
53static UVersionInfo dataVersion={ 0, 0, 0, 0 };
54
55/* Profile names must be aligned to UStringPrepProfileType */
56static const char * const PROFILE_NAMES[] = {
57    "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
58    "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
59    "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
60    "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
61    "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
62    "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
63    "rfc3722",      /* USPREP_RFC3722_ISCSI */
64    "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
65    "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
66    "rfc4011",      /* USPREP_RFC4011_MIB */
67    "rfc4013",      /* USPREP_RFC4013_SASLPREP */
68    "rfc4505",      /* USPREP_RFC4505_TRACE */
69    "rfc4518",      /* USPREP_RFC4518_LDAP */
70    "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
71};
72
73static UBool U_CALLCONV
74isSPrepAcceptable(void * /* context */,
75             const char * /* type */,
76             const char * /* name */,
77             const UDataInfo *pInfo) {
78    if(
79        pInfo->size>=20 &&
80        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
81        pInfo->charsetFamily==U_CHARSET_FAMILY &&
82        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
83        pInfo->dataFormat[1]==0x50 &&
84        pInfo->dataFormat[2]==0x52 &&
85        pInfo->dataFormat[3]==0x50 &&
86        pInfo->formatVersion[0]==3 &&
87        pInfo->formatVersion[2]==UTRIE_SHIFT &&
88        pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
89    ) {
90        //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
91        uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
92        return TRUE;
93    } else {
94        return FALSE;
95    }
96}
97
98static int32_t U_CALLCONV
99getSPrepFoldingOffset(uint32_t data) {
100
101    return (int32_t)data;
102
103}
104
105/* hashes an entry  */
106static int32_t U_CALLCONV
107hashEntry(const UHashTok parm) {
108    UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
109    UHashTok namekey, pathkey;
110    namekey.pointer = b->name;
111    pathkey.pointer = b->path;
112    return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
113}
114
115/* compares two entries */
116static UBool U_CALLCONV
117compareEntries(const UHashTok p1, const UHashTok p2) {
118    UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
119    UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
120    UHashTok name1, name2, path1, path2;
121    name1.pointer = b1->name;
122    name2.pointer = b2->name;
123    path1.pointer = b1->path;
124    path2.pointer = b2->path;
125    return ((UBool)(uhash_compareChars(name1, name2) &
126        uhash_compareChars(path1, path2)));
127}
128
129static void
130usprep_unload(UStringPrepProfile* data){
131    udata_close(data->sprepData);
132}
133
134static int32_t
135usprep_internal_flushCache(UBool noRefCount){
136    UStringPrepProfile *profile = NULL;
137    UStringPrepKey  *key  = NULL;
138    int32_t pos = -1;
139    int32_t deletedNum = 0;
140    const UHashElement *e;
141
142    /*
143     * if shared data hasn't even been lazy evaluated yet
144     * return 0
145     */
146    umtx_lock(&usprepMutex);
147    if (SHARED_DATA_HASHTABLE == NULL) {
148        umtx_unlock(&usprepMutex);
149        return 0;
150    }
151
152    /*creates an enumeration to iterate through every element in the table */
153    while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
154    {
155        profile = (UStringPrepProfile *) e->value.pointer;
156        key  = (UStringPrepKey *) e->key.pointer;
157
158        if ((noRefCount== FALSE && profile->refCount == 0) ||
159             noRefCount== TRUE) {
160            deletedNum++;
161            uhash_removeElement(SHARED_DATA_HASHTABLE, e);
162
163            /* unload the data */
164            usprep_unload(profile);
165
166            if(key->name != NULL) {
167                uprv_free(key->name);
168                key->name=NULL;
169            }
170            if(key->path != NULL) {
171                uprv_free(key->path);
172                key->path=NULL;
173            }
174            uprv_free(profile);
175            uprv_free(key);
176        }
177
178    }
179    umtx_unlock(&usprepMutex);
180
181    return deletedNum;
182}
183
184/* Works just like ucnv_flushCache()
185static int32_t
186usprep_flushCache(){
187    return usprep_internal_flushCache(FALSE);
188}
189*/
190
191static UBool U_CALLCONV usprep_cleanup(void){
192    if (SHARED_DATA_HASHTABLE != NULL) {
193        usprep_internal_flushCache(TRUE);
194        if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
195            uhash_close(SHARED_DATA_HASHTABLE);
196            SHARED_DATA_HASHTABLE = NULL;
197        }
198    }
199    gSharedDataInitOnce.reset();
200    return (SHARED_DATA_HASHTABLE == NULL);
201}
202U_CDECL_END
203
204
205/** Initializes the cache for resources */
206static void U_CALLCONV
207createCache(UErrorCode &status) {
208    SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
209    if (U_FAILURE(status)) {
210        SHARED_DATA_HASHTABLE = NULL;
211    }
212    ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
213}
214
215static void
216initCache(UErrorCode *status) {
217    umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
218}
219
220static UBool U_CALLCONV
221loadData(UStringPrepProfile* profile,
222         const char* path,
223         const char* name,
224         const char* type,
225         UErrorCode* errorCode) {
226    /* load Unicode SPREP data from file */
227    UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
228    UDataMemory *dataMemory;
229    const int32_t *p=NULL;
230    const uint8_t *pb;
231    UVersionInfo normUnicodeVersion;
232    int32_t normUniVer, sprepUniVer, normCorrVer;
233
234    if(errorCode==NULL || U_FAILURE(*errorCode)) {
235        return 0;
236    }
237
238    /* open the data outside the mutex block */
239    //TODO: change the path
240    dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
241    if(U_FAILURE(*errorCode)) {
242        return FALSE;
243    }
244
245    p=(const int32_t *)udata_getMemory(dataMemory);
246    pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
247    utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
248    _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
249
250
251    if(U_FAILURE(*errorCode)) {
252        udata_close(dataMemory);
253        return FALSE;
254    }
255
256    /* in the mutex block, set the data for this process */
257    umtx_lock(&usprepMutex);
258    if(profile->sprepData==NULL) {
259        profile->sprepData=dataMemory;
260        dataMemory=NULL;
261        uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
262        uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
263    } else {
264        p=(const int32_t *)udata_getMemory(profile->sprepData);
265    }
266    umtx_unlock(&usprepMutex);
267    /* initialize some variables */
268    profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
269
270    u_getUnicodeVersion(normUnicodeVersion);
271    normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
272                 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
273    sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
274                  (dataVersion[2] << 8 ) + (dataVersion[3]);
275    normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
276
277    if(U_FAILURE(*errorCode)){
278        udata_close(dataMemory);
279        return FALSE;
280    }
281    if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
282        normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
283        ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
284      ){
285        *errorCode = U_INVALID_FORMAT_ERROR;
286        udata_close(dataMemory);
287        return FALSE;
288    }
289    profile->isDataLoaded = TRUE;
290
291    /* if a different thread set it first, then close the extra data */
292    if(dataMemory!=NULL) {
293        udata_close(dataMemory); /* NULL if it was set correctly */
294    }
295
296
297    return profile->isDataLoaded;
298}
299
300static UStringPrepProfile*
301usprep_getProfile(const char* path,
302                  const char* name,
303                  UErrorCode *status){
304
305    UStringPrepProfile* profile = NULL;
306
307    initCache(status);
308
309    if(U_FAILURE(*status)){
310        return NULL;
311    }
312
313    UStringPrepKey stackKey;
314    /*
315     * const is cast way to save malloc, strcpy and free calls
316     * we use the passed in pointers for fetching the data from the
317     * hash table which is safe
318     */
319    stackKey.name = (char*) name;
320    stackKey.path = (char*) path;
321
322    /* fetch the data from the cache */
323    umtx_lock(&usprepMutex);
324    profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
325    if(profile != NULL) {
326        profile->refCount++;
327    }
328    umtx_unlock(&usprepMutex);
329
330    if(profile == NULL) {
331        /* else load the data and put the data in the cache */
332        LocalMemory<UStringPrepProfile> newProfile;
333        if(newProfile.allocateInsteadAndReset() == NULL) {
334            *status = U_MEMORY_ALLOCATION_ERROR;
335            return NULL;
336        }
337
338        /* load the data */
339        if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
340            return NULL;
341        }
342
343        /* get the options */
344        newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
345        newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
346
347        if(newProfile->checkBiDi) {
348            newProfile->bdp = ubidi_getSingleton();
349        }
350
351        LocalMemory<UStringPrepKey> key;
352        LocalMemory<char> keyName;
353        LocalMemory<char> keyPath;
354        if( key.allocateInsteadAndReset() == NULL ||
355            keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
356            (path != NULL &&
357             keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
358         ) {
359            *status = U_MEMORY_ALLOCATION_ERROR;
360            usprep_unload(newProfile.getAlias());
361            return NULL;
362        }
363
364        umtx_lock(&usprepMutex);
365        // If another thread already inserted the same key/value, refcount and cleanup our thread data
366        profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
367        if(profile != NULL) {
368            profile->refCount++;
369            usprep_unload(newProfile.getAlias());
370        }
371        else {
372            /* initialize the key members */
373            key->name = keyName.orphan();
374            uprv_strcpy(key->name, name);
375            if(path != NULL){
376                key->path = keyPath.orphan();
377                uprv_strcpy(key->path, path);
378            }
379            profile = newProfile.orphan();
380
381            /* add the data object to the cache */
382            profile->refCount = 1;
383            uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
384        }
385        umtx_unlock(&usprepMutex);
386    }
387
388    return profile;
389}
390
391U_CAPI UStringPrepProfile* U_EXPORT2
392usprep_open(const char* path,
393            const char* name,
394            UErrorCode* status){
395
396    if(status == NULL || U_FAILURE(*status)){
397        return NULL;
398    }
399
400    /* initialize the profile struct members */
401    return usprep_getProfile(path,name,status);
402}
403
404U_CAPI UStringPrepProfile* U_EXPORT2
405usprep_openByType(UStringPrepProfileType type,
406				  UErrorCode* status) {
407    if(status == NULL || U_FAILURE(*status)){
408        return NULL;
409    }
410    int32_t index = (int32_t)type;
411    if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
412        *status = U_ILLEGAL_ARGUMENT_ERROR;
413        return NULL;
414    }
415    return usprep_open(NULL, PROFILE_NAMES[index], status);
416}
417
418U_CAPI void U_EXPORT2
419usprep_close(UStringPrepProfile* profile){
420    if(profile==NULL){
421        return;
422    }
423
424    umtx_lock(&usprepMutex);
425    /* decrement the ref count*/
426    if(profile->refCount > 0){
427        profile->refCount--;
428    }
429    umtx_unlock(&usprepMutex);
430
431}
432
433U_CFUNC void
434uprv_syntaxError(const UChar* rules,
435                 int32_t pos,
436                 int32_t rulesLen,
437                 UParseError* parseError){
438    if(parseError == NULL){
439        return;
440    }
441    parseError->offset = pos;
442    parseError->line = 0 ; // we are not using line numbers
443
444    // for pre-context
445    int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
446    int32_t limit = pos;
447
448    u_memcpy(parseError->preContext,rules+start,limit-start);
449    //null terminate the buffer
450    parseError->preContext[limit-start] = 0;
451
452    // for post-context; include error rules[pos]
453    start = pos;
454    limit = start + (U_PARSE_CONTEXT_LEN-1);
455    if (limit > rulesLen) {
456        limit = rulesLen;
457    }
458    if (start < rulesLen) {
459        u_memcpy(parseError->postContext,rules+start,limit-start);
460    }
461    //null terminate the buffer
462    parseError->postContext[limit-start]= 0;
463}
464
465
466static inline UStringPrepType
467getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
468
469    UStringPrepType type;
470    if(trieWord == 0){
471        /*
472         * Initial value stored in the mapping table
473         * just return USPREP_TYPE_LIMIT .. so that
474         * the source codepoint is copied to the destination
475         */
476        type = USPREP_TYPE_LIMIT;
477        isIndex =FALSE;
478        value = 0;
479    }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
480        type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
481        isIndex =FALSE;
482        value = 0;
483    }else{
484        /* get the type */
485        type = USPREP_MAP;
486        /* ascertain if the value is index or delta */
487        if(trieWord & 0x02){
488            isIndex = TRUE;
489            value = trieWord  >> 2; //mask off the lower 2 bits and shift
490        }else{
491            isIndex = FALSE;
492            value = (int16_t)trieWord;
493            value =  (value >> 2);
494        }
495
496        if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497            type = USPREP_DELETE;
498            isIndex =FALSE;
499            value = 0;
500        }
501    }
502    return type;
503}
504
505
506
507static int32_t
508usprep_map(  const UStringPrepProfile* profile,
509             const UChar* src, int32_t srcLength,
510             UChar* dest, int32_t destCapacity,
511             int32_t options,
512             UParseError* parseError,
513             UErrorCode* status ){
514
515    uint16_t result;
516    int32_t destIndex=0;
517    int32_t srcIndex;
518    UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519    UStringPrepType type;
520    int16_t value;
521    UBool isIndex;
522    const int32_t* indexes = profile->indexes;
523
524    // no error checking the caller check for error and arguments
525    // no string length check the caller finds out the string length
526
527    for(srcIndex=0;srcIndex<srcLength;){
528        UChar32 ch;
529
530        U16_NEXT(src,srcIndex,srcLength,ch);
531
532        result=0;
533
534        UTRIE_GET16(&profile->sprepTrie,ch,result);
535
536        type = getValues(result, value, isIndex);
537
538        // check if the source codepoint is unassigned
539        if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540
541            uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542            *status = U_STRINGPREP_UNASSIGNED_ERROR;
543            return 0;
544
545        }else if(type == USPREP_MAP){
546
547            int32_t index, length;
548
549            if(isIndex){
550                index = value;
551                if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552                         index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553                    length = 1;
554                }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555                         index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556                    length = 2;
557                }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558                         index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559                    length = 3;
560                }else{
561                    length = profile->mappingData[index++];
562
563                }
564
565                /* copy mapping to destination */
566                for(int32_t i=0; i< length; i++){
567                    if(destIndex < destCapacity  ){
568                        dest[destIndex] = profile->mappingData[index+i];
569                    }
570                    destIndex++; /* for pre-flighting */
571                }
572                continue;
573            }else{
574                // subtract the delta to arrive at the code point
575                ch -= value;
576            }
577
578        }else if(type==USPREP_DELETE){
579             // just consume the codepoint and contine
580            continue;
581        }
582        //copy the code point into destination
583        if(ch <= 0xFFFF){
584            if(destIndex < destCapacity ){
585                dest[destIndex] = (UChar)ch;
586            }
587            destIndex++;
588        }else{
589            if(destIndex+1 < destCapacity ){
590                dest[destIndex]   = U16_LEAD(ch);
591                dest[destIndex+1] = U16_TRAIL(ch);
592            }
593            destIndex +=2;
594        }
595
596    }
597
598    return u_terminateUChars(dest, destCapacity, destIndex, status);
599}
600
601
602static int32_t
603usprep_normalize(   const UChar* src, int32_t srcLength,
604                    UChar* dest, int32_t destCapacity,
605                    UErrorCode* status ){
606    return unorm_normalize(
607        src, srcLength,
608        UNORM_NFKC, UNORM_UNICODE_3_2,
609        dest, destCapacity,
610        status);
611}
612
613
614 /*
615   1) Map -- For each character in the input, check if it has a mapping
616      and, if so, replace it with its mapping.
617
618   2) Normalize -- Possibly normalize the result of step 1 using Unicode
619      normalization.
620
621   3) Prohibit -- Check for any characters that are not allowed in the
622      output.  If any are found, return an error.
623
624   4) Check bidi -- Possibly check for right-to-left characters, and if
625      any are found, make sure that the whole string satisfies the
626      requirements for bidirectional strings.  If the string does not
627      satisfy the requirements for bidirectional strings, return an
628      error.
629      [Unicode3.2] defines several bidirectional categories; each character
630       has one bidirectional category assigned to it.  For the purposes of
631       the requirements below, an "RandALCat character" is a character that
632       has Unicode bidirectional categories "R" or "AL"; an "LCat character"
633       is a character that has Unicode bidirectional category "L".  Note
634
635
636       that there are many characters which fall in neither of the above
637       definitions; Latin digits (<U+0030> through <U+0039>) are examples of
638       this because they have bidirectional category "EN".
639
640       In any profile that specifies bidirectional character handling, all
641       three of the following requirements MUST be met:
642
643       1) The characters in section 5.8 MUST be prohibited.
644
645       2) If a string contains any RandALCat character, the string MUST NOT
646          contain any LCat character.
647
648       3) If a string contains any RandALCat character, a RandALCat
649          character MUST be the first character of the string, and a
650          RandALCat character MUST be the last character of the string.
651*/
652
653#define MAX_STACK_BUFFER_SIZE 300
654
655
656U_CAPI int32_t U_EXPORT2
657usprep_prepare(   const UStringPrepProfile* profile,
658                  const UChar* src, int32_t srcLength,
659                  UChar* dest, int32_t destCapacity,
660                  int32_t options,
661                  UParseError* parseError,
662                  UErrorCode* status ){
663
664    // check error status
665    if(status == NULL || U_FAILURE(*status)){
666        return 0;
667    }
668
669    //check arguments
670    if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
671        *status=U_ILLEGAL_ARGUMENT_ERROR;
672        return 0;
673    }
674
675    UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
676    UChar *b1 = b1Stack, *b2 = b2Stack;
677    int32_t b1Len, b2Len=0,
678            b1Capacity = MAX_STACK_BUFFER_SIZE ,
679            b2Capacity = MAX_STACK_BUFFER_SIZE;
680    uint16_t result;
681    int32_t b2Index = 0;
682    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
683    UBool leftToRight=FALSE, rightToLeft=FALSE;
684    int32_t rtlPos =-1, ltrPos =-1;
685
686    //get the string length
687    if(srcLength == -1){
688        srcLength = u_strlen(src);
689    }
690    // map
691    b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
692
693    if(*status == U_BUFFER_OVERFLOW_ERROR){
694        // redo processing of string
695        /* we do not have enough room so grow the buffer*/
696        b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
697        if(b1==NULL){
698            *status = U_MEMORY_ALLOCATION_ERROR;
699            goto CLEANUP;
700        }
701
702        *status = U_ZERO_ERROR; // reset error
703
704        b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
705
706    }
707
708    // normalize
709    if(profile->doNFKC == TRUE){
710        b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
711
712        if(*status == U_BUFFER_OVERFLOW_ERROR){
713            // redo processing of string
714            /* we do not have enough room so grow the buffer*/
715            b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
716            if(b2==NULL){
717                *status = U_MEMORY_ALLOCATION_ERROR;
718                goto CLEANUP;
719            }
720
721            *status = U_ZERO_ERROR; // reset error
722
723            b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
724
725        }
726
727    }else{
728        b2 = b1;
729        b2Len = b1Len;
730    }
731
732
733    if(U_FAILURE(*status)){
734        goto CLEANUP;
735    }
736
737    UChar32 ch;
738    UStringPrepType type;
739    int16_t value;
740    UBool isIndex;
741
742    // Prohibit and checkBiDi in one pass
743    for(b2Index=0; b2Index<b2Len;){
744
745        ch = 0;
746
747        U16_NEXT(b2, b2Index, b2Len, ch);
748
749        UTRIE_GET16(&profile->sprepTrie,ch,result);
750
751        type = getValues(result, value, isIndex);
752
753        if( type == USPREP_PROHIBITED ||
754            ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
755           ){
756            *status = U_STRINGPREP_PROHIBITED_ERROR;
757            uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
758            goto CLEANUP;
759        }
760
761        if(profile->checkBiDi) {
762            direction = ubidi_getClass(profile->bdp, ch);
763            if(firstCharDir == U_CHAR_DIRECTION_COUNT){
764                firstCharDir = direction;
765            }
766            if(direction == U_LEFT_TO_RIGHT){
767                leftToRight = TRUE;
768                ltrPos = b2Index-1;
769            }
770            if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
771                rightToLeft = TRUE;
772                rtlPos = b2Index-1;
773            }
774        }
775    }
776    if(profile->checkBiDi == TRUE){
777        // satisfy 2
778        if( leftToRight == TRUE && rightToLeft == TRUE){
779            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
780            uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
781            goto CLEANUP;
782        }
783
784        //satisfy 3
785        if( rightToLeft == TRUE &&
786            !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
787              (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
788           ){
789            *status = U_STRINGPREP_CHECK_BIDI_ERROR;
790            uprv_syntaxError(b2, rtlPos, b2Len, parseError);
791            return FALSE;
792        }
793    }
794    if(b2Len>0 && b2Len <= destCapacity){
795        uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
796    }
797
798CLEANUP:
799    if(b1!=b1Stack){
800        uprv_free(b1);
801        b1=NULL;
802    }
803
804    if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
805        uprv_free(b2);
806        b2=NULL;
807    }
808    return u_terminateUChars(dest, destCapacity, b2Len, status);
809}
810
811
812/* data swapping ------------------------------------------------------------ */
813
814U_CAPI int32_t U_EXPORT2
815usprep_swap(const UDataSwapper *ds,
816            const void *inData, int32_t length, void *outData,
817            UErrorCode *pErrorCode) {
818    const UDataInfo *pInfo;
819    int32_t headerSize;
820
821    const uint8_t *inBytes;
822    uint8_t *outBytes;
823
824    const int32_t *inIndexes;
825    int32_t indexes[16];
826
827    int32_t i, offset, count, size;
828
829    /* udata_swapDataHeader checks the arguments */
830    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
831    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
832        return 0;
833    }
834
835    /* check data format and format version */
836    pInfo=(const UDataInfo *)((const char *)inData+4);
837    if(!(
838        pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
839        pInfo->dataFormat[1]==0x50 &&
840        pInfo->dataFormat[2]==0x52 &&
841        pInfo->dataFormat[3]==0x50 &&
842        pInfo->formatVersion[0]==3
843    )) {
844        udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
845                         pInfo->dataFormat[0], pInfo->dataFormat[1],
846                         pInfo->dataFormat[2], pInfo->dataFormat[3],
847                         pInfo->formatVersion[0]);
848        *pErrorCode=U_UNSUPPORTED_ERROR;
849        return 0;
850    }
851
852    inBytes=(const uint8_t *)inData+headerSize;
853    outBytes=(uint8_t *)outData+headerSize;
854
855    inIndexes=(const int32_t *)inBytes;
856
857    if(length>=0) {
858        length-=headerSize;
859        if(length<16*4) {
860            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
861                             length);
862            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
863            return 0;
864        }
865    }
866
867    /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
868    for(i=0; i<16; ++i) {
869        indexes[i]=udata_readInt32(ds, inIndexes[i]);
870    }
871
872    /* calculate the total length of the data */
873    size=
874        16*4+ /* size of indexes[] */
875        indexes[_SPREP_INDEX_TRIE_SIZE]+
876        indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
877
878    if(length>=0) {
879        if(length<size) {
880            udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
881                             length);
882            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
883            return 0;
884        }
885
886        /* copy the data for inaccessible bytes */
887        if(inBytes!=outBytes) {
888            uprv_memcpy(outBytes, inBytes, size);
889        }
890
891        offset=0;
892
893        /* swap the int32_t indexes[] */
894        count=16*4;
895        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
896        offset+=count;
897
898        /* swap the UTrie */
899        count=indexes[_SPREP_INDEX_TRIE_SIZE];
900        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
901        offset+=count;
902
903        /* swap the uint16_t mappingTable[] */
904        count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
905        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
906        offset+=count;
907    }
908
909    return headerSize+size;
910}
911
912#endif /* #if !UCONFIG_NO_IDNA */
913