1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 1999-2010, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  uinvchar.c
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:2
14*
15*   created on: 2004sep14
16*   created by: Markus W. Scherer
17*
18*   Functions for handling invariant characters, moved here from putil.c
19*   for better modularization.
20*/
21
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "udataswp.h"
25#include "cstring.h"
26#include "cmemory.h"
27#include "uassert.h"
28#include "uinvchar.h"
29
30/* invariant-character handling --------------------------------------------- */
31
32/*
33 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
34 * appropriately for most EBCDIC codepages.
35 *
36 * They currently also map most other ASCII graphic characters,
37 * appropriately for codepages 37 and 1047.
38 * Exceptions: The characters for []^ have different codes in 37 & 1047.
39 * Both versions are mapped to ASCII.
40 *
41 *    ASCII 37 1047
42 * [     5B BA   AD
43 * ]     5D BB   BD
44 * ^     5E B0   5F
45 *
46 * There are no mappings for variant characters from Unicode to EBCDIC.
47 *
48 * Currently, C0 control codes are also included in these maps.
49 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
50 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
51 * but there is no mapping for ASCII LF back to EBCDIC.
52 *
53 *    ASCII EBCDIC S/390-OE
54 * LF    0A     25       15
55 * NEL   85     15       25
56 *
57 * The maps below explicitly exclude the variant
58 * control and graphical characters that are in ASCII-based
59 * codepages at 0x80 and above.
60 * "No mapping" is expressed by mapping to a 00 byte.
61 *
62 * These tables do not establish a converter or a codepage.
63 */
64
65static const uint8_t asciiFromEbcdic[256]={
66    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
67    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
68    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
69    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
70
71    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
72    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
73    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
74    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75
76    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
78    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
79    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
80
81    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
85};
86
87static const uint8_t ebcdicFromAscii[256]={
88    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
89    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
90    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
91    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
92
93    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
94    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
95    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
96    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
97
98    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
103    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
107};
108
109/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
110static const uint8_t lowercaseAsciiFromEbcdic[256]={
111    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
112    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
113    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
114    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
115
116    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
117    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
118    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
119    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
120
121    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
122    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
124    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
125
126    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
128    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
130};
131
132/*
133 * Bit sets indicating which characters of the ASCII repertoire
134 * (by ASCII/Unicode code) are "invariant".
135 * See utypes.h for more details.
136 *
137 * As invariant are considered the characters of the ASCII repertoire except
138 * for the following:
139 * 21  '!' <exclamation mark>
140 * 23  '#' <number sign>
141 * 24  '$' <dollar sign>
142 *
143 * 40  '@' <commercial at>
144 *
145 * 5b  '[' <left bracket>
146 * 5c  '\' <backslash>
147 * 5d  ']' <right bracket>
148 * 5e  '^' <circumflex>
149 *
150 * 60  '`' <grave accent>
151 *
152 * 7b  '{' <left brace>
153 * 7c  '|' <vertical line>
154 * 7d  '}' <right brace>
155 * 7e  '~' <tilde>
156 */
157static const uint32_t invariantChars[4]={
158    0xfffffbff, /* 00..1f but not 0a */
159    0xffffffe5, /* 20..3f but not 21 23 24 */
160    0x87fffffe, /* 40..5f but not 40 5b..5e */
161    0x87fffffe  /* 60..7f but not 60 7b..7e */
162};
163
164/*
165 * test unsigned types (or values known to be non-negative) for invariant characters,
166 * tests ASCII-family character values
167 */
168#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
169
170/* test signed types for invariant characters, adds test for positive values */
171#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
172
173#if U_CHARSET_FAMILY==U_ASCII_FAMILY
174#define CHAR_TO_UCHAR(c) c
175#define UCHAR_TO_CHAR(c) c
176#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
177#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
178#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
179#else
180#   error U_CHARSET_FAMILY is not valid
181#endif
182
183
184U_CAPI void U_EXPORT2
185u_charsToUChars(const char *cs, UChar *us, int32_t length) {
186    UChar u;
187    uint8_t c;
188
189    /*
190     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
191     * For EBCDIC systems, this works for characters with codes from
192     * codepages 37 and 1047 or compatible.
193     */
194    while(length>0) {
195        c=(uint8_t)(*cs++);
196        u=(UChar)CHAR_TO_UCHAR(c);
197        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
198        *us++=u;
199        --length;
200    }
201}
202
203U_CAPI void U_EXPORT2
204u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
205    UChar u;
206
207    while(length>0) {
208        u=*us++;
209        if(!UCHAR_IS_INVARIANT(u)) {
210            U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
211            u=0;
212        }
213        *cs++=(char)UCHAR_TO_CHAR(u);
214        --length;
215    }
216}
217
218U_CAPI UBool U_EXPORT2
219uprv_isInvariantString(const char *s, int32_t length) {
220    uint8_t c;
221
222    for(;;) {
223        if(length<0) {
224            /* NUL-terminated */
225            c=(uint8_t)*s++;
226            if(c==0) {
227                break;
228            }
229        } else {
230            /* count length */
231            if(length==0) {
232                break;
233            }
234            --length;
235            c=(uint8_t)*s++;
236            if(c==0) {
237                continue; /* NUL is invariant */
238            }
239        }
240        /* c!=0 now, one branch below checks c==0 for variant characters */
241
242        /*
243         * no assertions here because these functions are legitimately called
244         * for strings with variant characters
245         */
246#if U_CHARSET_FAMILY==U_ASCII_FAMILY
247        if(!UCHAR_IS_INVARIANT(c)) {
248            return FALSE; /* found a variant char */
249        }
250#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
251        c=CHAR_TO_UCHAR(c);
252        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
253            return FALSE; /* found a variant char */
254        }
255#else
256#   error U_CHARSET_FAMILY is not valid
257#endif
258    }
259    return TRUE;
260}
261
262U_CAPI UBool U_EXPORT2
263uprv_isInvariantUString(const UChar *s, int32_t length) {
264    UChar c;
265
266    for(;;) {
267        if(length<0) {
268            /* NUL-terminated */
269            c=*s++;
270            if(c==0) {
271                break;
272            }
273        } else {
274            /* count length */
275            if(length==0) {
276                break;
277            }
278            --length;
279            c=*s++;
280        }
281
282        /*
283         * no assertions here because these functions are legitimately called
284         * for strings with variant characters
285         */
286        if(!UCHAR_IS_INVARIANT(c)) {
287            return FALSE; /* found a variant char */
288        }
289    }
290    return TRUE;
291}
292
293/* UDataSwapFn implementations used in udataswp.c ------- */
294
295/* convert ASCII to EBCDIC and verify that all characters are invariant */
296U_CAPI int32_t U_EXPORT2
297uprv_ebcdicFromAscii(const UDataSwapper *ds,
298                     const void *inData, int32_t length, void *outData,
299                     UErrorCode *pErrorCode) {
300    const uint8_t *s;
301    uint8_t *t;
302    uint8_t c;
303
304    int32_t count;
305
306    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
307        return 0;
308    }
309    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
310        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
311        return 0;
312    }
313
314    /* setup and swapping */
315    s=(const uint8_t *)inData;
316    t=(uint8_t *)outData;
317    count=length;
318    while(count>0) {
319        c=*s++;
320        if(!UCHAR_IS_INVARIANT(c)) {
321            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
322                             length, length-count);
323            *pErrorCode=U_INVALID_CHAR_FOUND;
324            return 0;
325        }
326        *t++=ebcdicFromAscii[c];
327        --count;
328    }
329
330    return length;
331}
332
333/* this function only checks and copies ASCII strings without conversion */
334U_CFUNC int32_t
335uprv_copyAscii(const UDataSwapper *ds,
336               const void *inData, int32_t length, void *outData,
337               UErrorCode *pErrorCode) {
338    const uint8_t *s;
339    uint8_t c;
340
341    int32_t count;
342
343    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
344        return 0;
345    }
346    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
347        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
348        return 0;
349    }
350
351    /* setup and checking */
352    s=(const uint8_t *)inData;
353    count=length;
354    while(count>0) {
355        c=*s++;
356        if(!UCHAR_IS_INVARIANT(c)) {
357            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
358                             length, length-count);
359            *pErrorCode=U_INVALID_CHAR_FOUND;
360            return 0;
361        }
362        --count;
363    }
364
365    if(length>0 && inData!=outData) {
366        uprv_memcpy(outData, inData, length);
367    }
368
369    return length;
370}
371
372/* convert EBCDIC to ASCII and verify that all characters are invariant */
373U_CFUNC int32_t
374uprv_asciiFromEbcdic(const UDataSwapper *ds,
375                     const void *inData, int32_t length, void *outData,
376                     UErrorCode *pErrorCode) {
377    const uint8_t *s;
378    uint8_t *t;
379    uint8_t c;
380
381    int32_t count;
382
383    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
384        return 0;
385    }
386    if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
387        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388        return 0;
389    }
390
391    /* setup and swapping */
392    s=(const uint8_t *)inData;
393    t=(uint8_t *)outData;
394    count=length;
395    while(count>0) {
396        c=*s++;
397        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
398            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
399                             length, length-count);
400            *pErrorCode=U_INVALID_CHAR_FOUND;
401            return 0;
402        }
403        *t++=c;
404        --count;
405    }
406
407    return length;
408}
409
410/* this function only checks and copies EBCDIC strings without conversion */
411U_CFUNC int32_t
412uprv_copyEbcdic(const UDataSwapper *ds,
413                const void *inData, int32_t length, void *outData,
414                UErrorCode *pErrorCode) {
415    const uint8_t *s;
416    uint8_t c;
417
418    int32_t count;
419
420    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
421        return 0;
422    }
423    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
424        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
425        return 0;
426    }
427
428    /* setup and checking */
429    s=(const uint8_t *)inData;
430    count=length;
431    while(count>0) {
432        c=*s++;
433        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
434            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
435                             length, length-count);
436            *pErrorCode=U_INVALID_CHAR_FOUND;
437            return 0;
438        }
439        --count;
440    }
441
442    if(length>0 && inData!=outData) {
443        uprv_memcpy(outData, inData, length);
444    }
445
446    return length;
447}
448
449/* compare invariant strings; variant characters compare less than others and unlike each other */
450U_CFUNC int32_t
451uprv_compareInvAscii(const UDataSwapper *ds,
452                     const char *outString, int32_t outLength,
453                     const UChar *localString, int32_t localLength) {
454    (void)ds;
455    int32_t minLength;
456    UChar32 c1, c2;
457    uint8_t c;
458
459    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
460        return 0;
461    }
462
463    if(outLength<0) {
464        outLength=(int32_t)uprv_strlen(outString);
465    }
466    if(localLength<0) {
467        localLength=u_strlen(localString);
468    }
469
470    minLength= outLength<localLength ? outLength : localLength;
471
472    while(minLength>0) {
473        c=(uint8_t)*outString++;
474        if(UCHAR_IS_INVARIANT(c)) {
475            c1=c;
476        } else {
477            c1=-1;
478        }
479
480        c2=*localString++;
481        if(!UCHAR_IS_INVARIANT(c2)) {
482            c2=-2;
483        }
484
485        if((c1-=c2)!=0) {
486            return c1;
487        }
488
489        --minLength;
490    }
491
492    /* strings start with same prefix, compare lengths */
493    return outLength-localLength;
494}
495
496U_CFUNC int32_t
497uprv_compareInvEbcdic(const UDataSwapper *ds,
498                      const char *outString, int32_t outLength,
499                      const UChar *localString, int32_t localLength) {
500    (void)ds;
501    int32_t minLength;
502    UChar32 c1, c2;
503    uint8_t c;
504
505    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
506        return 0;
507    }
508
509    if(outLength<0) {
510        outLength=(int32_t)uprv_strlen(outString);
511    }
512    if(localLength<0) {
513        localLength=u_strlen(localString);
514    }
515
516    minLength= outLength<localLength ? outLength : localLength;
517
518    while(minLength>0) {
519        c=(uint8_t)*outString++;
520        if(c==0) {
521            c1=0;
522        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
523            /* c1 is set */
524        } else {
525            c1=-1;
526        }
527
528        c2=*localString++;
529        if(!UCHAR_IS_INVARIANT(c2)) {
530            c2=-2;
531        }
532
533        if((c1-=c2)!=0) {
534            return c1;
535        }
536
537        --minLength;
538    }
539
540    /* strings start with same prefix, compare lengths */
541    return outLength-localLength;
542}
543
544U_CAPI int32_t U_EXPORT2
545uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
546    int32_t c1, c2;
547
548    for(;; ++s1, ++s2) {
549        c1=(uint8_t)*s1;
550        c2=(uint8_t)*s2;
551        if(c1!=c2) {
552            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
553                c1=-(int32_t)(uint8_t)*s1;
554            }
555            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
556                c2=-(int32_t)(uint8_t)*s2;
557            }
558            return c1-c2;
559        } else if(c1==0) {
560            return 0;
561        }
562    }
563}
564
565U_CAPI char U_EXPORT2
566uprv_ebcdicToLowercaseAscii(char c) {
567    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
568}
569
570U_INTERNAL uint8_t* U_EXPORT2
571uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
572{
573  uint8_t *orig_dst = dst;
574
575  if(n==-1) {
576    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
577  }
578  /* copy non-null */
579  while(*src && n>0) {
580    *(dst++) = asciiFromEbcdic[*(src++)];
581    n--;
582  }
583  /* pad */
584  while(n>0) {
585    *(dst++) = 0;
586    n--;
587  }
588  return orig_dst;
589}
590
591U_INTERNAL uint8_t* U_EXPORT2
592uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
593{
594  uint8_t *orig_dst = dst;
595
596  if(n==-1) {
597    n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
598  }
599  /* copy non-null */
600  while(*src && n>0) {
601    char ch = ebcdicFromAscii[*(src++)];
602    if(ch == 0) {
603      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
604    }
605    *(dst++) = ch;
606    n--;
607  }
608  /* pad */
609  while(n>0) {
610    *(dst++) = 0;
611    n--;
612  }
613  return orig_dst;
614}
615
616