1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: nptrans.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19#include "unicode/utypes.h" 20 21#if !UCONFIG_NO_TRANSLITERATION 22#if !UCONFIG_NO_IDNA 23 24#include "nptrans.h" 25#include "unicode/resbund.h" 26#include "unicode/uniset.h" 27#include "sprpimpl.h" 28#include "cmemory.h" 29#include "ustr_imp.h" 30#include "intltest.h" 31 32#ifdef NPTRANS_DEBUG 33#include <stdio.h> 34#endif 35 36const char NamePrepTransform::fgClassID=0; 37 38//Factory method 39NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){ 40 NamePrepTransform* transform = new NamePrepTransform(parseError, status); 41 if(U_FAILURE(status)){ 42 delete transform; 43 return NULL; 44 } 45 return transform; 46} 47 48//constructor 49NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status) 50: unassigned(), prohibited(), labelSeparatorSet(){ 51 52 mapping = NULL; 53 bundle = NULL; 54 55 56 const char* testDataName = IntlTest::loadTestData(status); 57 58 if(U_FAILURE(status)){ 59 return; 60 } 61 62 bundle = ures_openDirect(testDataName,"idna_rules",&status); 63 64 if(bundle != NULL && U_SUCCESS(status)){ 65 // create the mapping transliterator 66 int32_t ruleLen = 0; 67 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status); 68 int32_t mapRuleLen = 0; 69 const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status); 70 UnicodeString rule(mapRuleUChar, mapRuleLen); 71 rule.append(ruleUChar, ruleLen); 72 73 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule, 74 UTRANS_FORWARD, parseError,status); 75 if(U_FAILURE(status)) { 76 return; 77 } 78 79 //create the unassigned set 80 int32_t patternLen =0; 81 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status); 82 unassigned.applyPattern(UnicodeString(pattern, patternLen), status); 83 84 //create prohibited set 85 patternLen=0; 86 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status); 87 UnicodeString test(pattern,patternLen); 88 prohibited.applyPattern(test,status); 89#ifdef NPTRANS_DEBUG 90 if(U_FAILURE(status)){ 91 printf("Construction of Unicode set failed\n"); 92 } 93 94 if(U_SUCCESS(status)){ 95 if(prohibited.contains((UChar) 0x644)){ 96 printf("The string contains 0x644 ... !!\n"); 97 } 98 UnicodeString temp; 99 prohibited.toPattern(temp,TRUE); 100 101 for(int32_t i=0;i<temp.length();i++){ 102 printf("%c", (char)temp.charAt(i)); 103 } 104 printf("\n"); 105 } 106#endif 107 108 //create label separator set 109 patternLen=0; 110 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status); 111 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status); 112 } 113 114 if(U_SUCCESS(status) && 115 (mapping == NULL) 116 ){ 117 status = U_MEMORY_ALLOCATION_ERROR; 118 delete mapping; 119 ures_close(bundle); 120 mapping = NULL; 121 bundle = NULL; 122 } 123 124} 125 126 127UBool NamePrepTransform::isProhibited(UChar32 ch){ 128 return (UBool)(ch != ASCII_SPACE); 129} 130 131NamePrepTransform::~NamePrepTransform(){ 132 delete mapping; 133 mapping = NULL; 134 135 //close the bundle 136 ures_close(bundle); 137 bundle = NULL; 138} 139 140 141int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, 142 UChar* dest, int32_t destCapacity, 143 UBool allowUnassigned, 144 UParseError* /*parseError*/, 145 UErrorCode& status ){ 146 147 if(U_FAILURE(status)){ 148 return 0; 149 } 150 //check arguments 151 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 152 status=U_ILLEGAL_ARGUMENT_ERROR; 153 return 0; 154 } 155 156 UnicodeString rsource(src,srcLength); 157 // map the code points 158 // transliteration also performs NFKC 159 mapping->transliterate(rsource); 160 161 const UChar* buffer = rsource.getBuffer(); 162 int32_t bufLen = rsource.length(); 163 // check if unassigned 164 if(allowUnassigned == FALSE){ 165 int32_t bufIndex=0; 166 UChar32 ch =0 ; 167 for(;bufIndex<bufLen;){ 168 U16_NEXT(buffer, bufIndex, bufLen, ch); 169 if(unassigned.contains(ch)){ 170 status = U_IDNA_UNASSIGNED_ERROR; 171 return 0; 172 } 173 } 174 } 175 // check if there is enough room in the output 176 if(bufLen < destCapacity){ 177 u_memcpy(dest, buffer, bufLen); 178 } 179 180 return u_terminateUChars(dest, destCapacity, bufLen, &status); 181} 182 183 184#define MAX_BUFFER_SIZE 300 185 186int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, 187 UChar* dest, int32_t destCapacity, 188 UBool allowUnassigned, 189 UParseError* parseError, 190 UErrorCode& status ){ 191 // check error status 192 if(U_FAILURE(status)){ 193 return 0; 194 } 195 196 //check arguments 197 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 198 status=U_ILLEGAL_ARGUMENT_ERROR; 199 return 0; 200 } 201 202 UnicodeString b1String; 203 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); 204 int32_t b1Len; 205 206 int32_t b1Index = 0; 207 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 208 UBool leftToRight=FALSE, rightToLeft=FALSE; 209 210 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); 211 b1String.releaseBuffer(b1Len); 212 213 if(status == U_BUFFER_OVERFLOW_ERROR){ 214 // redo processing of string 215 /* we do not have enough room so grow the buffer*/ 216 b1 = b1String.getBuffer(b1Len); 217 status = U_ZERO_ERROR; // reset error 218 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); 219 b1String.releaseBuffer(b1Len); 220 } 221 222 if(U_FAILURE(status)){ 223 b1Len = 0; 224 goto CLEANUP; 225 } 226 227 228 for(; b1Index<b1Len; ){ 229 230 UChar32 ch = 0; 231 232 U16_NEXT(b1, b1Index, b1Len, ch); 233 234 if(prohibited.contains(ch) && ch!=0x0020){ 235 status = U_IDNA_PROHIBITED_ERROR; 236 b1Len = 0; 237 goto CLEANUP; 238 } 239 240 direction = u_charDirection(ch); 241 if(firstCharDir==U_CHAR_DIRECTION_COUNT){ 242 firstCharDir = direction; 243 } 244 if(direction == U_LEFT_TO_RIGHT){ 245 leftToRight = TRUE; 246 } 247 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 248 rightToLeft = TRUE; 249 } 250 } 251 252 // satisfy 2 253 if( leftToRight == TRUE && rightToLeft == TRUE){ 254 status = U_IDNA_CHECK_BIDI_ERROR; 255 b1Len = 0; 256 goto CLEANUP; 257 } 258 259 //satisfy 3 260 if( rightToLeft == TRUE && 261 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 262 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 263 ){ 264 status = U_IDNA_CHECK_BIDI_ERROR; 265 return FALSE; 266 } 267 268 if(b1Len <= destCapacity){ 269 u_memmove(dest, b1, b1Len); 270 } 271 272CLEANUP: 273 return u_terminateUChars(dest, destCapacity, b1Len, &status); 274} 275 276UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){ 277 // check error status 278 if(U_FAILURE(status)){ 279 return FALSE; 280 } 281 282 return labelSeparatorSet.contains(ch); 283} 284 285#endif /* #if !UCONFIG_NO_IDNA */ 286#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 287