1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 1999-2010, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* file name: uinvchar.c 11* encoding: UTF-8 12* tab size: 8 (not used) 13* indentation:2 14* 15* created on: 2004sep14 16* created by: Markus W. Scherer 17* 18* Functions for handling invariant characters, moved here from putil.c 19* for better modularization. 20*/ 21 22#include "unicode/utypes.h" 23#include "unicode/ustring.h" 24#include "udataswp.h" 25#include "cstring.h" 26#include "cmemory.h" 27#include "uassert.h" 28#include "uinvchar.h" 29 30/* invariant-character handling --------------------------------------------- */ 31 32/* 33 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) 34 * appropriately for most EBCDIC codepages. 35 * 36 * They currently also map most other ASCII graphic characters, 37 * appropriately for codepages 37 and 1047. 38 * Exceptions: The characters for []^ have different codes in 37 & 1047. 39 * Both versions are mapped to ASCII. 40 * 41 * ASCII 37 1047 42 * [ 5B BA AD 43 * ] 5D BB BD 44 * ^ 5E B0 5F 45 * 46 * There are no mappings for variant characters from Unicode to EBCDIC. 47 * 48 * Currently, C0 control codes are also included in these maps. 49 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other 50 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), 51 * but there is no mapping for ASCII LF back to EBCDIC. 52 * 53 * ASCII EBCDIC S/390-OE 54 * LF 0A 25 15 55 * NEL 85 15 25 56 * 57 * The maps below explicitly exclude the variant 58 * control and graphical characters that are in ASCII-based 59 * codepages at 0x80 and above. 60 * "No mapping" is expressed by mapping to a 00 byte. 61 * 62 * These tables do not establish a converter or a codepage. 63 */ 64 65static const uint8_t asciiFromEbcdic[256]={ 66 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 67 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 68 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 69 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 70 71 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 72 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 73 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 74 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 75 76 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 77 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 78 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 79 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 80 81 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 82 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 83 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 84 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 85}; 86 87static const uint8_t ebcdicFromAscii[256]={ 88 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 89 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 90 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, 91 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, 92 93 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 94 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, 95 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 96 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, 97 98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 107}; 108 109/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ 110static const uint8_t lowercaseAsciiFromEbcdic[256]={ 111 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 112 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 113 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 114 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 115 116 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 117 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 118 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 119 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 120 121 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 122 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 123 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 124 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 125 126 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 127 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 128 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 129 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 130}; 131 132/* 133 * Bit sets indicating which characters of the ASCII repertoire 134 * (by ASCII/Unicode code) are "invariant". 135 * See utypes.h for more details. 136 * 137 * As invariant are considered the characters of the ASCII repertoire except 138 * for the following: 139 * 21 '!' <exclamation mark> 140 * 23 '#' <number sign> 141 * 24 '$' <dollar sign> 142 * 143 * 40 '@' <commercial at> 144 * 145 * 5b '[' <left bracket> 146 * 5c '\' <backslash> 147 * 5d ']' <right bracket> 148 * 5e '^' <circumflex> 149 * 150 * 60 '`' <grave accent> 151 * 152 * 7b '{' <left brace> 153 * 7c '|' <vertical line> 154 * 7d '}' <right brace> 155 * 7e '~' <tilde> 156 */ 157static const uint32_t invariantChars[4]={ 158 0xfffffbff, /* 00..1f but not 0a */ 159 0xffffffe5, /* 20..3f but not 21 23 24 */ 160 0x87fffffe, /* 40..5f but not 40 5b..5e */ 161 0x87fffffe /* 60..7f but not 60 7b..7e */ 162}; 163 164/* 165 * test unsigned types (or values known to be non-negative) for invariant characters, 166 * tests ASCII-family character values 167 */ 168#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) 169 170/* test signed types for invariant characters, adds test for positive values */ 171#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) 172 173#if U_CHARSET_FAMILY==U_ASCII_FAMILY 174#define CHAR_TO_UCHAR(c) c 175#define UCHAR_TO_CHAR(c) c 176#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 177#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] 178#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] 179#else 180# error U_CHARSET_FAMILY is not valid 181#endif 182 183 184U_CAPI void U_EXPORT2 185u_charsToUChars(const char *cs, UChar *us, int32_t length) { 186 UChar u; 187 uint8_t c; 188 189 /* 190 * Allow the entire ASCII repertoire to be mapped _to_ Unicode. 191 * For EBCDIC systems, this works for characters with codes from 192 * codepages 37 and 1047 or compatible. 193 */ 194 while(length>0) { 195 c=(uint8_t)(*cs++); 196 u=(UChar)CHAR_TO_UCHAR(c); 197 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ 198 *us++=u; 199 --length; 200 } 201} 202 203U_CAPI void U_EXPORT2 204u_UCharsToChars(const UChar *us, char *cs, int32_t length) { 205 UChar u; 206 207 while(length>0) { 208 u=*us++; 209 if(!UCHAR_IS_INVARIANT(u)) { 210 U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ 211 u=0; 212 } 213 *cs++=(char)UCHAR_TO_CHAR(u); 214 --length; 215 } 216} 217 218U_CAPI UBool U_EXPORT2 219uprv_isInvariantString(const char *s, int32_t length) { 220 uint8_t c; 221 222 for(;;) { 223 if(length<0) { 224 /* NUL-terminated */ 225 c=(uint8_t)*s++; 226 if(c==0) { 227 break; 228 } 229 } else { 230 /* count length */ 231 if(length==0) { 232 break; 233 } 234 --length; 235 c=(uint8_t)*s++; 236 if(c==0) { 237 continue; /* NUL is invariant */ 238 } 239 } 240 /* c!=0 now, one branch below checks c==0 for variant characters */ 241 242 /* 243 * no assertions here because these functions are legitimately called 244 * for strings with variant characters 245 */ 246#if U_CHARSET_FAMILY==U_ASCII_FAMILY 247 if(!UCHAR_IS_INVARIANT(c)) { 248 return FALSE; /* found a variant char */ 249 } 250#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 251 c=CHAR_TO_UCHAR(c); 252 if(c==0 || !UCHAR_IS_INVARIANT(c)) { 253 return FALSE; /* found a variant char */ 254 } 255#else 256# error U_CHARSET_FAMILY is not valid 257#endif 258 } 259 return TRUE; 260} 261 262U_CAPI UBool U_EXPORT2 263uprv_isInvariantUString(const UChar *s, int32_t length) { 264 UChar c; 265 266 for(;;) { 267 if(length<0) { 268 /* NUL-terminated */ 269 c=*s++; 270 if(c==0) { 271 break; 272 } 273 } else { 274 /* count length */ 275 if(length==0) { 276 break; 277 } 278 --length; 279 c=*s++; 280 } 281 282 /* 283 * no assertions here because these functions are legitimately called 284 * for strings with variant characters 285 */ 286 if(!UCHAR_IS_INVARIANT(c)) { 287 return FALSE; /* found a variant char */ 288 } 289 } 290 return TRUE; 291} 292 293/* UDataSwapFn implementations used in udataswp.c ------- */ 294 295/* convert ASCII to EBCDIC and verify that all characters are invariant */ 296U_CAPI int32_t U_EXPORT2 297uprv_ebcdicFromAscii(const UDataSwapper *ds, 298 const void *inData, int32_t length, void *outData, 299 UErrorCode *pErrorCode) { 300 const uint8_t *s; 301 uint8_t *t; 302 uint8_t c; 303 304 int32_t count; 305 306 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 307 return 0; 308 } 309 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 310 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 311 return 0; 312 } 313 314 /* setup and swapping */ 315 s=(const uint8_t *)inData; 316 t=(uint8_t *)outData; 317 count=length; 318 while(count>0) { 319 c=*s++; 320 if(!UCHAR_IS_INVARIANT(c)) { 321 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", 322 length, length-count); 323 *pErrorCode=U_INVALID_CHAR_FOUND; 324 return 0; 325 } 326 *t++=ebcdicFromAscii[c]; 327 --count; 328 } 329 330 return length; 331} 332 333/* this function only checks and copies ASCII strings without conversion */ 334U_CFUNC int32_t 335uprv_copyAscii(const UDataSwapper *ds, 336 const void *inData, int32_t length, void *outData, 337 UErrorCode *pErrorCode) { 338 const uint8_t *s; 339 uint8_t c; 340 341 int32_t count; 342 343 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 344 return 0; 345 } 346 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 347 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 348 return 0; 349 } 350 351 /* setup and checking */ 352 s=(const uint8_t *)inData; 353 count=length; 354 while(count>0) { 355 c=*s++; 356 if(!UCHAR_IS_INVARIANT(c)) { 357 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", 358 length, length-count); 359 *pErrorCode=U_INVALID_CHAR_FOUND; 360 return 0; 361 } 362 --count; 363 } 364 365 if(length>0 && inData!=outData) { 366 uprv_memcpy(outData, inData, length); 367 } 368 369 return length; 370} 371 372/* convert EBCDIC to ASCII and verify that all characters are invariant */ 373U_CFUNC int32_t 374uprv_asciiFromEbcdic(const UDataSwapper *ds, 375 const void *inData, int32_t length, void *outData, 376 UErrorCode *pErrorCode) { 377 const uint8_t *s; 378 uint8_t *t; 379 uint8_t c; 380 381 int32_t count; 382 383 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 384 return 0; 385 } 386 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 387 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 388 return 0; 389 } 390 391 /* setup and swapping */ 392 s=(const uint8_t *)inData; 393 t=(uint8_t *)outData; 394 count=length; 395 while(count>0) { 396 c=*s++; 397 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 398 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", 399 length, length-count); 400 *pErrorCode=U_INVALID_CHAR_FOUND; 401 return 0; 402 } 403 *t++=c; 404 --count; 405 } 406 407 return length; 408} 409 410/* this function only checks and copies EBCDIC strings without conversion */ 411U_CFUNC int32_t 412uprv_copyEbcdic(const UDataSwapper *ds, 413 const void *inData, int32_t length, void *outData, 414 UErrorCode *pErrorCode) { 415 const uint8_t *s; 416 uint8_t c; 417 418 int32_t count; 419 420 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 421 return 0; 422 } 423 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 424 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 425 return 0; 426 } 427 428 /* setup and checking */ 429 s=(const uint8_t *)inData; 430 count=length; 431 while(count>0) { 432 c=*s++; 433 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 434 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", 435 length, length-count); 436 *pErrorCode=U_INVALID_CHAR_FOUND; 437 return 0; 438 } 439 --count; 440 } 441 442 if(length>0 && inData!=outData) { 443 uprv_memcpy(outData, inData, length); 444 } 445 446 return length; 447} 448 449/* compare invariant strings; variant characters compare less than others and unlike each other */ 450U_CFUNC int32_t 451uprv_compareInvAscii(const UDataSwapper *ds, 452 const char *outString, int32_t outLength, 453 const UChar *localString, int32_t localLength) { 454 (void)ds; 455 int32_t minLength; 456 UChar32 c1, c2; 457 uint8_t c; 458 459 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 460 return 0; 461 } 462 463 if(outLength<0) { 464 outLength=(int32_t)uprv_strlen(outString); 465 } 466 if(localLength<0) { 467 localLength=u_strlen(localString); 468 } 469 470 minLength= outLength<localLength ? outLength : localLength; 471 472 while(minLength>0) { 473 c=(uint8_t)*outString++; 474 if(UCHAR_IS_INVARIANT(c)) { 475 c1=c; 476 } else { 477 c1=-1; 478 } 479 480 c2=*localString++; 481 if(!UCHAR_IS_INVARIANT(c2)) { 482 c2=-2; 483 } 484 485 if((c1-=c2)!=0) { 486 return c1; 487 } 488 489 --minLength; 490 } 491 492 /* strings start with same prefix, compare lengths */ 493 return outLength-localLength; 494} 495 496U_CFUNC int32_t 497uprv_compareInvEbcdic(const UDataSwapper *ds, 498 const char *outString, int32_t outLength, 499 const UChar *localString, int32_t localLength) { 500 (void)ds; 501 int32_t minLength; 502 UChar32 c1, c2; 503 uint8_t c; 504 505 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 506 return 0; 507 } 508 509 if(outLength<0) { 510 outLength=(int32_t)uprv_strlen(outString); 511 } 512 if(localLength<0) { 513 localLength=u_strlen(localString); 514 } 515 516 minLength= outLength<localLength ? outLength : localLength; 517 518 while(minLength>0) { 519 c=(uint8_t)*outString++; 520 if(c==0) { 521 c1=0; 522 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { 523 /* c1 is set */ 524 } else { 525 c1=-1; 526 } 527 528 c2=*localString++; 529 if(!UCHAR_IS_INVARIANT(c2)) { 530 c2=-2; 531 } 532 533 if((c1-=c2)!=0) { 534 return c1; 535 } 536 537 --minLength; 538 } 539 540 /* strings start with same prefix, compare lengths */ 541 return outLength-localLength; 542} 543 544U_CAPI int32_t U_EXPORT2 545uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { 546 int32_t c1, c2; 547 548 for(;; ++s1, ++s2) { 549 c1=(uint8_t)*s1; 550 c2=(uint8_t)*s2; 551 if(c1!=c2) { 552 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { 553 c1=-(int32_t)(uint8_t)*s1; 554 } 555 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { 556 c2=-(int32_t)(uint8_t)*s2; 557 } 558 return c1-c2; 559 } else if(c1==0) { 560 return 0; 561 } 562 } 563} 564 565U_CAPI char U_EXPORT2 566uprv_ebcdicToLowercaseAscii(char c) { 567 return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; 568} 569 570U_INTERNAL uint8_t* U_EXPORT2 571uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 572{ 573 uint8_t *orig_dst = dst; 574 575 if(n==-1) { 576 n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */ 577 } 578 /* copy non-null */ 579 while(*src && n>0) { 580 *(dst++) = asciiFromEbcdic[*(src++)]; 581 n--; 582 } 583 /* pad */ 584 while(n>0) { 585 *(dst++) = 0; 586 n--; 587 } 588 return orig_dst; 589} 590 591U_INTERNAL uint8_t* U_EXPORT2 592uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 593{ 594 uint8_t *orig_dst = dst; 595 596 if(n==-1) { 597 n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */ 598 } 599 /* copy non-null */ 600 while(*src && n>0) { 601 char ch = ebcdicFromAscii[*(src++)]; 602 if(ch == 0) { 603 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ 604 } 605 *(dst++) = ch; 606 n--; 607 } 608 /* pad */ 609 while(n>0) { 610 *(dst++) = 0; 611 n--; 612 } 613 return orig_dst; 614} 615 616