1/*
2******************************************************************************
3*
4*   Copyright (C) 2000-2009, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ucnvscsu.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2000nov18
14*   created by: Markus W. Scherer
15*
16*   This is an implementation of the Standard Compression Scheme for Unicode
17*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
18*   Reserved commands and window settings are treated as illegal sequences and
19*   will result in callback calls.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_CONVERSION
25
26#include "unicode/ucnv.h"
27#include "unicode/ucnv_cb.h"
28#include "ucnv_bld.h"
29#include "ucnv_cnv.h"
30#include "cmemory.h"
31
32/* SCSU definitions --------------------------------------------------------- */
33
34/* SCSU command byte values */
35enum {
36    SQ0=0x01, /* Quote from window pair 0 */
37    SQ7=0x08, /* Quote from window pair 7 */
38    SDX=0x0B, /* Define a window as extended */
39    Srs=0x0C, /* reserved */
40    SQU=0x0E, /* Quote a single Unicode character */
41    SCU=0x0F, /* Change to Unicode mode */
42    SC0=0x10, /* Select window 0 */
43    SC7=0x17, /* Select window 7 */
44    SD0=0x18, /* Define and select window 0 */
45    SD7=0x1F, /* Define and select window 7 */
46
47    UC0=0xE0, /* Select window 0 */
48    UC7=0xE7, /* Select window 7 */
49    UD0=0xE8, /* Define and select window 0 */
50    UD7=0xEF, /* Define and select window 7 */
51    UQU=0xF0, /* Quote a single Unicode character */
52    UDX=0xF1, /* Define a Window as extended */
53    Urs=0xF2  /* reserved */
54};
55
56enum {
57    /*
58     * Unicode code points from 3400 to E000 are not adressible by
59     * dynamic window, since in these areas no short run alphabets are
60     * found. Therefore add gapOffset to all values from gapThreshold.
61     */
62    gapThreshold=0x68,
63    gapOffset=0xAC00,
64
65    /* values between reservedStart and fixedThreshold are reserved */
66    reservedStart=0xA8,
67
68    /* use table of predefined fixed offsets for values from fixedThreshold */
69    fixedThreshold=0xF9
70};
71
72/* constant offsets for the 8 static windows */
73static const uint32_t staticOffsets[8]={
74    0x0000, /* ASCII for quoted tags */
75    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
76    0x0100, /* Latin Extended-A */
77    0x0300, /* Combining Diacritical Marks */
78    0x2000, /* General Punctuation */
79    0x2080, /* Currency Symbols */
80    0x2100, /* Letterlike Symbols and Number Forms */
81    0x3000  /* CJK Symbols and punctuation */
82};
83
84/* initial offsets for the 8 dynamic (sliding) windows */
85static const uint32_t initialDynamicOffsets[8]={
86    0x0080, /* Latin-1 */
87    0x00C0, /* Latin Extended A */
88    0x0400, /* Cyrillic */
89    0x0600, /* Arabic */
90    0x0900, /* Devanagari */
91    0x3040, /* Hiragana */
92    0x30A0, /* Katakana */
93    0xFF00  /* Fullwidth ASCII */
94};
95
96/* Table of fixed predefined Offsets */
97static const uint32_t fixedOffsets[]={
98    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
99    /* 0xFA */ 0x0250, /* IPA extensions */
100    /* 0xFB */ 0x0370, /* Greek */
101    /* 0xFC */ 0x0530, /* Armenian */
102    /* 0xFD */ 0x3040, /* Hiragana */
103    /* 0xFE */ 0x30A0, /* Katakana */
104    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
105};
106
107/* state values */
108enum {
109    readCommand,
110    quotePairOne,
111    quotePairTwo,
112    quoteOne,
113    definePairOne,
114    definePairTwo,
115    defineOne
116};
117
118typedef struct SCSUData {
119    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
120    uint32_t toUDynamicOffsets[8];
121    uint32_t fromUDynamicOffsets[8];
122
123    /* state machine state - toUnicode */
124    UBool toUIsSingleByteMode;
125    uint8_t toUState;
126    int8_t toUQuoteWindow, toUDynamicWindow;
127    uint8_t toUByteOne;
128    uint8_t toUPadding[3];
129
130    /* state machine state - fromUnicode */
131    UBool fromUIsSingleByteMode;
132    int8_t fromUDynamicWindow;
133
134    /*
135     * windowUse[] keeps track of the use of the dynamic windows:
136     * At nextWindowUseIndex there is the least recently used window,
137     * and the following windows (in a wrapping manner) are more and more
138     * recently used.
139     * At nextWindowUseIndex-1 there is the most recently used window.
140     */
141    uint8_t locale;
142    int8_t nextWindowUseIndex;
143    int8_t windowUse[8];
144} SCSUData;
145
146static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
147static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
148
149enum {
150    lGeneric, l_ja
151};
152
153/* SCSU setup functions ----------------------------------------------------- */
154
155static void
156_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
157    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
158
159    if(choice<=UCNV_RESET_TO_UNICODE) {
160        /* reset toUnicode */
161        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
162
163        scsu->toUIsSingleByteMode=TRUE;
164        scsu->toUState=readCommand;
165        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
166        scsu->toUByteOne=0;
167
168        cnv->toULength=0;
169    }
170    if(choice!=UCNV_RESET_TO_UNICODE) {
171        /* reset fromUnicode */
172        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
173
174        scsu->fromUIsSingleByteMode=TRUE;
175        scsu->fromUDynamicWindow=0;
176
177        scsu->nextWindowUseIndex=0;
178        switch(scsu->locale) {
179        case l_ja:
180            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
181            break;
182        default:
183            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
184            break;
185        }
186
187        cnv->fromUChar32=0;
188    }
189}
190
191static void
192_SCSUOpen(UConverter *cnv,
193          UConverterLoadArgs *pArgs,
194          UErrorCode *pErrorCode) {
195    const char *locale=pArgs->locale;
196    if(pArgs->onlyTestIsLoadable) {
197        return;
198    }
199    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
200    if(cnv->extraInfo!=NULL) {
201        if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
202            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
203        } else {
204            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
205        }
206        _SCSUReset(cnv, UCNV_RESET_BOTH);
207    } else {
208        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
209    }
210
211    /* Set the substitution character U+fffd as a Unicode string. */
212    cnv->subUChars[0]=0xfffd;
213    cnv->subCharLen=-1;
214}
215
216static void
217_SCSUClose(UConverter *cnv) {
218    if(cnv->extraInfo!=NULL) {
219        if(!cnv->isExtraLocal) {
220            uprv_free(cnv->extraInfo);
221        }
222        cnv->extraInfo=NULL;
223    }
224}
225
226/* SCSU-to-Unicode conversion functions ------------------------------------- */
227
228static void
229_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
230                          UErrorCode *pErrorCode) {
231    UConverter *cnv;
232    SCSUData *scsu;
233    const uint8_t *source, *sourceLimit;
234    UChar *target;
235    const UChar *targetLimit;
236    int32_t *offsets;
237    UBool isSingleByteMode;
238    uint8_t state, byteOne;
239    int8_t quoteWindow, dynamicWindow;
240
241    int32_t sourceIndex, nextSourceIndex;
242
243    uint8_t b;
244
245    /* set up the local pointers */
246    cnv=pArgs->converter;
247    scsu=(SCSUData *)cnv->extraInfo;
248
249    source=(const uint8_t *)pArgs->source;
250    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
251    target=pArgs->target;
252    targetLimit=pArgs->targetLimit;
253    offsets=pArgs->offsets;
254
255    /* get the state machine state */
256    isSingleByteMode=scsu->toUIsSingleByteMode;
257    state=scsu->toUState;
258    quoteWindow=scsu->toUQuoteWindow;
259    dynamicWindow=scsu->toUDynamicWindow;
260    byteOne=scsu->toUByteOne;
261
262    /* sourceIndex=-1 if the current character began in the previous buffer */
263    sourceIndex=state==readCommand ? 0 : -1;
264    nextSourceIndex=0;
265
266    /*
267     * conversion "loop"
268     *
269     * For performance, this is not a normal C loop.
270     * Instead, there are two code blocks for the two SCSU modes.
271     * The function branches to either one, and a change of the mode is done with a goto to
272     * the other branch.
273     *
274     * Each branch has two conventional loops:
275     * - a fast-path loop for the most common codes in the mode
276     * - a loop for all other codes in the mode
277     * When the fast-path runs into a code that it cannot handle, its loop ends and it
278     * runs into the following loop to handle the other codes.
279     * The end of the input or output buffer is also handled by the slower loop.
280     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
281     *
282     * The callback handling is done by returning with an error code.
283     * The conversion framework actually calls the callback function.
284     */
285    if(isSingleByteMode) {
286        /* fast path for single-byte mode */
287        if(state==readCommand) {
288fastSingle:
289            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
290                ++source;
291                ++nextSourceIndex;
292                if(b<=0x7f) {
293                    /* write US-ASCII graphic character or DEL */
294                    *target++=(UChar)b;
295                    if(offsets!=NULL) {
296                        *offsets++=sourceIndex;
297                    }
298                } else {
299                    /* write from dynamic window */
300                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
301                    if(c<=0xffff) {
302                        *target++=(UChar)c;
303                        if(offsets!=NULL) {
304                            *offsets++=sourceIndex;
305                        }
306                    } else {
307                        /* output surrogate pair */
308                        *target++=(UChar)(0xd7c0+(c>>10));
309                        if(target<targetLimit) {
310                            *target++=(UChar)(0xdc00|(c&0x3ff));
311                            if(offsets!=NULL) {
312                                *offsets++=sourceIndex;
313                                *offsets++=sourceIndex;
314                            }
315                        } else {
316                            /* target overflow */
317                            if(offsets!=NULL) {
318                                *offsets++=sourceIndex;
319                            }
320                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
321                            cnv->UCharErrorBufferLength=1;
322                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
323                            goto endloop;
324                        }
325                    }
326                }
327                sourceIndex=nextSourceIndex;
328            }
329        }
330
331        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
332singleByteMode:
333        while(source<sourceLimit) {
334            if(target>=targetLimit) {
335                /* target is full */
336                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
337                break;
338            }
339            b=*source++;
340            ++nextSourceIndex;
341            switch(state) {
342            case readCommand:
343                /* redundant conditions are commented out */
344                /* here: b<0x20 because otherwise we would be in fastSingle */
345                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
346                    /* CR/LF/TAB/NUL */
347                    *target++=(UChar)b;
348                    if(offsets!=NULL) {
349                        *offsets++=sourceIndex;
350                    }
351                    sourceIndex=nextSourceIndex;
352                    goto fastSingle;
353                } else if(SC0<=b) {
354                    if(b<=SC7) {
355                        dynamicWindow=(int8_t)(b-SC0);
356                        sourceIndex=nextSourceIndex;
357                        goto fastSingle;
358                    } else /* if(SD0<=b && b<=SD7) */ {
359                        dynamicWindow=(int8_t)(b-SD0);
360                        state=defineOne;
361                    }
362                } else if(/* SQ0<=b && */ b<=SQ7) {
363                    quoteWindow=(int8_t)(b-SQ0);
364                    state=quoteOne;
365                } else if(b==SDX) {
366                    state=definePairOne;
367                } else if(b==SQU) {
368                    state=quotePairOne;
369                } else if(b==SCU) {
370                    sourceIndex=nextSourceIndex;
371                    isSingleByteMode=FALSE;
372                    goto fastUnicode;
373                } else /* Srs */ {
374                    /* callback(illegal) */
375                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
376                    cnv->toUBytes[0]=b;
377                    cnv->toULength=1;
378                    goto endloop;
379                }
380
381                /* store the first byte of a multibyte sequence in toUBytes[] */
382                cnv->toUBytes[0]=b;
383                cnv->toULength=1;
384                break;
385            case quotePairOne:
386                byteOne=b;
387                cnv->toUBytes[1]=b;
388                cnv->toULength=2;
389                state=quotePairTwo;
390                break;
391            case quotePairTwo:
392                *target++=(UChar)((byteOne<<8)|b);
393                if(offsets!=NULL) {
394                    *offsets++=sourceIndex;
395                }
396                sourceIndex=nextSourceIndex;
397                state=readCommand;
398                goto fastSingle;
399            case quoteOne:
400                if(b<0x80) {
401                    /* all static offsets are in the BMP */
402                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
403                    if(offsets!=NULL) {
404                        *offsets++=sourceIndex;
405                    }
406                } else {
407                    /* write from dynamic window */
408                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
409                    if(c<=0xffff) {
410                        *target++=(UChar)c;
411                        if(offsets!=NULL) {
412                            *offsets++=sourceIndex;
413                        }
414                    } else {
415                        /* output surrogate pair */
416                        *target++=(UChar)(0xd7c0+(c>>10));
417                        if(target<targetLimit) {
418                            *target++=(UChar)(0xdc00|(c&0x3ff));
419                            if(offsets!=NULL) {
420                                *offsets++=sourceIndex;
421                                *offsets++=sourceIndex;
422                            }
423                        } else {
424                            /* target overflow */
425                            if(offsets!=NULL) {
426                                *offsets++=sourceIndex;
427                            }
428                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
429                            cnv->UCharErrorBufferLength=1;
430                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
431                            goto endloop;
432                        }
433                    }
434                }
435                sourceIndex=nextSourceIndex;
436                state=readCommand;
437                goto fastSingle;
438            case definePairOne:
439                dynamicWindow=(int8_t)((b>>5)&7);
440                byteOne=(uint8_t)(b&0x1f);
441                cnv->toUBytes[1]=b;
442                cnv->toULength=2;
443                state=definePairTwo;
444                break;
445            case definePairTwo:
446                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
447                sourceIndex=nextSourceIndex;
448                state=readCommand;
449                goto fastSingle;
450            case defineOne:
451                if(b==0) {
452                    /* callback(illegal): Reserved window offset value 0 */
453                    cnv->toUBytes[1]=b;
454                    cnv->toULength=2;
455                    goto endloop;
456                } else if(b<gapThreshold) {
457                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
458                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
459                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
460                } else if(b>=fixedThreshold) {
461                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
462                } else {
463                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
464                    cnv->toUBytes[1]=b;
465                    cnv->toULength=2;
466                    goto endloop;
467                }
468                sourceIndex=nextSourceIndex;
469                state=readCommand;
470                goto fastSingle;
471            }
472        }
473    } else {
474        /* fast path for Unicode mode */
475        if(state==readCommand) {
476fastUnicode:
477            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
478                *target++=(UChar)((b<<8)|source[1]);
479                if(offsets!=NULL) {
480                    *offsets++=sourceIndex;
481                }
482                sourceIndex=nextSourceIndex;
483                nextSourceIndex+=2;
484                source+=2;
485            }
486        }
487
488        /* normal state machine for Unicode mode */
489/* unicodeByteMode: */
490        while(source<sourceLimit) {
491            if(target>=targetLimit) {
492                /* target is full */
493                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
494                break;
495            }
496            b=*source++;
497            ++nextSourceIndex;
498            switch(state) {
499            case readCommand:
500                if((uint8_t)(b-UC0)>(Urs-UC0)) {
501                    byteOne=b;
502                    cnv->toUBytes[0]=b;
503                    cnv->toULength=1;
504                    state=quotePairTwo;
505                } else if(/* UC0<=b && */ b<=UC7) {
506                    dynamicWindow=(int8_t)(b-UC0);
507                    sourceIndex=nextSourceIndex;
508                    isSingleByteMode=TRUE;
509                    goto fastSingle;
510                } else if(/* UD0<=b && */ b<=UD7) {
511                    dynamicWindow=(int8_t)(b-UD0);
512                    isSingleByteMode=TRUE;
513                    cnv->toUBytes[0]=b;
514                    cnv->toULength=1;
515                    state=defineOne;
516                    goto singleByteMode;
517                } else if(b==UDX) {
518                    isSingleByteMode=TRUE;
519                    cnv->toUBytes[0]=b;
520                    cnv->toULength=1;
521                    state=definePairOne;
522                    goto singleByteMode;
523                } else if(b==UQU) {
524                    cnv->toUBytes[0]=b;
525                    cnv->toULength=1;
526                    state=quotePairOne;
527                } else /* Urs */ {
528                    /* callback(illegal) */
529                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
530                    cnv->toUBytes[0]=b;
531                    cnv->toULength=1;
532                    goto endloop;
533                }
534                break;
535            case quotePairOne:
536                byteOne=b;
537                cnv->toUBytes[1]=b;
538                cnv->toULength=2;
539                state=quotePairTwo;
540                break;
541            case quotePairTwo:
542                *target++=(UChar)((byteOne<<8)|b);
543                if(offsets!=NULL) {
544                    *offsets++=sourceIndex;
545                }
546                sourceIndex=nextSourceIndex;
547                state=readCommand;
548                goto fastUnicode;
549            }
550        }
551    }
552endloop:
553
554    /* set the converter state back into UConverter */
555    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
556        /* reset to deal with the next character */
557        state=readCommand;
558    } else if(state==readCommand) {
559        /* not in a multi-byte sequence, reset toULength */
560        cnv->toULength=0;
561    }
562    scsu->toUIsSingleByteMode=isSingleByteMode;
563    scsu->toUState=state;
564    scsu->toUQuoteWindow=quoteWindow;
565    scsu->toUDynamicWindow=dynamicWindow;
566    scsu->toUByteOne=byteOne;
567
568    /* write back the updated pointers */
569    pArgs->source=(const char *)source;
570    pArgs->target=target;
571    pArgs->offsets=offsets;
572    return;
573}
574
575/*
576 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
577 * If a change is made in the original function, then either
578 * change this function the same way or
579 * re-copy the original function and remove the variables
580 * offsets, sourceIndex, and nextSourceIndex.
581 */
582static void
583_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
584               UErrorCode *pErrorCode) {
585    UConverter *cnv;
586    SCSUData *scsu;
587    const uint8_t *source, *sourceLimit;
588    UChar *target;
589    const UChar *targetLimit;
590    UBool isSingleByteMode;
591    uint8_t state, byteOne;
592    int8_t quoteWindow, dynamicWindow;
593
594    uint8_t b;
595
596    /* set up the local pointers */
597    cnv=pArgs->converter;
598    scsu=(SCSUData *)cnv->extraInfo;
599
600    source=(const uint8_t *)pArgs->source;
601    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
602    target=pArgs->target;
603    targetLimit=pArgs->targetLimit;
604
605    /* get the state machine state */
606    isSingleByteMode=scsu->toUIsSingleByteMode;
607    state=scsu->toUState;
608    quoteWindow=scsu->toUQuoteWindow;
609    dynamicWindow=scsu->toUDynamicWindow;
610    byteOne=scsu->toUByteOne;
611
612    /*
613     * conversion "loop"
614     *
615     * For performance, this is not a normal C loop.
616     * Instead, there are two code blocks for the two SCSU modes.
617     * The function branches to either one, and a change of the mode is done with a goto to
618     * the other branch.
619     *
620     * Each branch has two conventional loops:
621     * - a fast-path loop for the most common codes in the mode
622     * - a loop for all other codes in the mode
623     * When the fast-path runs into a code that it cannot handle, its loop ends and it
624     * runs into the following loop to handle the other codes.
625     * The end of the input or output buffer is also handled by the slower loop.
626     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
627     *
628     * The callback handling is done by returning with an error code.
629     * The conversion framework actually calls the callback function.
630     */
631    if(isSingleByteMode) {
632        /* fast path for single-byte mode */
633        if(state==readCommand) {
634fastSingle:
635            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
636                ++source;
637                if(b<=0x7f) {
638                    /* write US-ASCII graphic character or DEL */
639                    *target++=(UChar)b;
640                } else {
641                    /* write from dynamic window */
642                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
643                    if(c<=0xffff) {
644                        *target++=(UChar)c;
645                    } else {
646                        /* output surrogate pair */
647                        *target++=(UChar)(0xd7c0+(c>>10));
648                        if(target<targetLimit) {
649                            *target++=(UChar)(0xdc00|(c&0x3ff));
650                        } else {
651                            /* target overflow */
652                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
653                            cnv->UCharErrorBufferLength=1;
654                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
655                            goto endloop;
656                        }
657                    }
658                }
659            }
660        }
661
662        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
663singleByteMode:
664        while(source<sourceLimit) {
665            if(target>=targetLimit) {
666                /* target is full */
667                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
668                break;
669            }
670            b=*source++;
671            switch(state) {
672            case readCommand:
673                /* redundant conditions are commented out */
674                /* here: b<0x20 because otherwise we would be in fastSingle */
675                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
676                    /* CR/LF/TAB/NUL */
677                    *target++=(UChar)b;
678                    goto fastSingle;
679                } else if(SC0<=b) {
680                    if(b<=SC7) {
681                        dynamicWindow=(int8_t)(b-SC0);
682                        goto fastSingle;
683                    } else /* if(SD0<=b && b<=SD7) */ {
684                        dynamicWindow=(int8_t)(b-SD0);
685                        state=defineOne;
686                    }
687                } else if(/* SQ0<=b && */ b<=SQ7) {
688                    quoteWindow=(int8_t)(b-SQ0);
689                    state=quoteOne;
690                } else if(b==SDX) {
691                    state=definePairOne;
692                } else if(b==SQU) {
693                    state=quotePairOne;
694                } else if(b==SCU) {
695                    isSingleByteMode=FALSE;
696                    goto fastUnicode;
697                } else /* Srs */ {
698                    /* callback(illegal) */
699                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
700                    cnv->toUBytes[0]=b;
701                    cnv->toULength=1;
702                    goto endloop;
703                }
704
705                /* store the first byte of a multibyte sequence in toUBytes[] */
706                cnv->toUBytes[0]=b;
707                cnv->toULength=1;
708                break;
709            case quotePairOne:
710                byteOne=b;
711                cnv->toUBytes[1]=b;
712                cnv->toULength=2;
713                state=quotePairTwo;
714                break;
715            case quotePairTwo:
716                *target++=(UChar)((byteOne<<8)|b);
717                state=readCommand;
718                goto fastSingle;
719            case quoteOne:
720                if(b<0x80) {
721                    /* all static offsets are in the BMP */
722                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
723                } else {
724                    /* write from dynamic window */
725                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
726                    if(c<=0xffff) {
727                        *target++=(UChar)c;
728                    } else {
729                        /* output surrogate pair */
730                        *target++=(UChar)(0xd7c0+(c>>10));
731                        if(target<targetLimit) {
732                            *target++=(UChar)(0xdc00|(c&0x3ff));
733                        } else {
734                            /* target overflow */
735                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
736                            cnv->UCharErrorBufferLength=1;
737                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
738                            goto endloop;
739                        }
740                    }
741                }
742                state=readCommand;
743                goto fastSingle;
744            case definePairOne:
745                dynamicWindow=(int8_t)((b>>5)&7);
746                byteOne=(uint8_t)(b&0x1f);
747                cnv->toUBytes[1]=b;
748                cnv->toULength=2;
749                state=definePairTwo;
750                break;
751            case definePairTwo:
752                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
753                state=readCommand;
754                goto fastSingle;
755            case defineOne:
756                if(b==0) {
757                    /* callback(illegal): Reserved window offset value 0 */
758                    cnv->toUBytes[1]=b;
759                    cnv->toULength=2;
760                    goto endloop;
761                } else if(b<gapThreshold) {
762                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
763                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
764                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
765                } else if(b>=fixedThreshold) {
766                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
767                } else {
768                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
769                    cnv->toUBytes[1]=b;
770                    cnv->toULength=2;
771                    goto endloop;
772                }
773                state=readCommand;
774                goto fastSingle;
775            }
776        }
777    } else {
778        /* fast path for Unicode mode */
779        if(state==readCommand) {
780fastUnicode:
781            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
782                *target++=(UChar)((b<<8)|source[1]);
783                source+=2;
784            }
785        }
786
787        /* normal state machine for Unicode mode */
788/* unicodeByteMode: */
789        while(source<sourceLimit) {
790            if(target>=targetLimit) {
791                /* target is full */
792                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
793                break;
794            }
795            b=*source++;
796            switch(state) {
797            case readCommand:
798                if((uint8_t)(b-UC0)>(Urs-UC0)) {
799                    byteOne=b;
800                    cnv->toUBytes[0]=b;
801                    cnv->toULength=1;
802                    state=quotePairTwo;
803                } else if(/* UC0<=b && */ b<=UC7) {
804                    dynamicWindow=(int8_t)(b-UC0);
805                    isSingleByteMode=TRUE;
806                    goto fastSingle;
807                } else if(/* UD0<=b && */ b<=UD7) {
808                    dynamicWindow=(int8_t)(b-UD0);
809                    isSingleByteMode=TRUE;
810                    cnv->toUBytes[0]=b;
811                    cnv->toULength=1;
812                    state=defineOne;
813                    goto singleByteMode;
814                } else if(b==UDX) {
815                    isSingleByteMode=TRUE;
816                    cnv->toUBytes[0]=b;
817                    cnv->toULength=1;
818                    state=definePairOne;
819                    goto singleByteMode;
820                } else if(b==UQU) {
821                    cnv->toUBytes[0]=b;
822                    cnv->toULength=1;
823                    state=quotePairOne;
824                } else /* Urs */ {
825                    /* callback(illegal) */
826                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
827                    cnv->toUBytes[0]=b;
828                    cnv->toULength=1;
829                    goto endloop;
830                }
831                break;
832            case quotePairOne:
833                byteOne=b;
834                cnv->toUBytes[1]=b;
835                cnv->toULength=2;
836                state=quotePairTwo;
837                break;
838            case quotePairTwo:
839                *target++=(UChar)((byteOne<<8)|b);
840                state=readCommand;
841                goto fastUnicode;
842            }
843        }
844    }
845endloop:
846
847    /* set the converter state back into UConverter */
848    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
849        /* reset to deal with the next character */
850        state=readCommand;
851    } else if(state==readCommand) {
852        /* not in a multi-byte sequence, reset toULength */
853        cnv->toULength=0;
854    }
855    scsu->toUIsSingleByteMode=isSingleByteMode;
856    scsu->toUState=state;
857    scsu->toUQuoteWindow=quoteWindow;
858    scsu->toUDynamicWindow=dynamicWindow;
859    scsu->toUByteOne=byteOne;
860
861    /* write back the updated pointers */
862    pArgs->source=(const char *)source;
863    pArgs->target=target;
864    return;
865}
866
867/* SCSU-from-Unicode conversion functions ----------------------------------- */
868
869/*
870 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
871 * reasonable results. The lookahead is minimal.
872 * Many cases are simple:
873 * A character fits directly into the current mode, a dynamic or static window,
874 * or is not compressible. These cases are tested first.
875 * Real compression heuristics are applied to the rest, in code branches for
876 * single/Unicode mode and BMP/supplementary code points.
877 * The heuristics used here are extremely simple.
878 */
879
880/* get the number of the window that this character is in, or -1 */
881static int8_t
882getWindow(const uint32_t offsets[8], uint32_t c) {
883    int i;
884    for(i=0; i<8; ++i) {
885        if((uint32_t)(c-offsets[i])<=0x7f) {
886            return (int8_t)(i);
887        }
888    }
889    return -1;
890}
891
892/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
893static UBool
894isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
895    return (UBool)(c<=offset+0x7f &&
896          (c>=offset || (c<=0x7f &&
897                        (c>=0x20 || (1UL<<c)&0x2601))));
898                                /* binary 0010 0110 0000 0001,
899                                   check for b==0xd || b==0xa || b==9 || b==0 */
900}
901
902/*
903 * getNextDynamicWindow returns the next dynamic window to be redefined
904 */
905static int8_t
906getNextDynamicWindow(SCSUData *scsu) {
907    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
908    if(++scsu->nextWindowUseIndex==8) {
909        scsu->nextWindowUseIndex=0;
910    }
911    return window;
912}
913
914/*
915 * useDynamicWindow() adjusts
916 * windowUse[] and nextWindowUseIndex for the algorithm to choose
917 * the next dynamic window to be defined;
918 * a subclass may override it and provide its own algorithm.
919 */
920static void
921useDynamicWindow(SCSUData *scsu, int8_t window) {
922    /*
923     * move the existing window, which just became the most recently used one,
924     * up in windowUse[] to nextWindowUseIndex-1
925     */
926
927    /* first, find the index of the window - backwards to favor the more recently used windows */
928    int i, j;
929
930    i=scsu->nextWindowUseIndex;
931    do {
932        if(--i<0) {
933            i=7;
934        }
935    } while(scsu->windowUse[i]!=window);
936
937    /* now copy each windowUse[i+1] to [i] */
938    j=i+1;
939    if(j==8) {
940        j=0;
941    }
942    while(j!=scsu->nextWindowUseIndex) {
943        scsu->windowUse[i]=scsu->windowUse[j];
944        i=j;
945        if(++j==8) { j=0; }
946    }
947
948    /* finally, set the window into the most recently used index */
949    scsu->windowUse[i]=window;
950}
951
952/*
953 * calculate the offset and the code for a dynamic window that contains the character
954 * takes fixed offsets into account
955 * the offset of the window is stored in the offset variable,
956 * the code is returned
957 *
958 * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
959 */
960static int
961getDynamicOffset(uint32_t c, uint32_t *pOffset) {
962    int i;
963
964    for(i=0; i<7; ++i) {
965        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
966            *pOffset=fixedOffsets[i];
967            return 0xf9+i;
968        }
969    }
970
971    if(c<0x80) {
972        /* No dynamic window for US-ASCII. */
973        return -1;
974    } else if(c<0x3400 ||
975              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
976              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
977    ) {
978        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
979        *pOffset=c&0x7fffff80;
980        return (int)(c>>7);
981    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
982        /* For these characters we need to take the gapOffset into account. */
983        *pOffset=c&0x7fffff80;
984        return (int)((c-gapOffset)>>7);
985    } else {
986        return -1;
987    }
988}
989
990/*
991 * Idea for compression:
992 *  - save SCSUData and other state before really starting work
993 *  - at endloop, see if compression could be better with just unicode mode
994 *  - don't do this if a callback has been called
995 *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
996 *  - different buffer handling!
997 *
998 * Drawback or need for corrective handling:
999 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
1000 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
1001 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1002 *
1003 * How to achieve both?
1004 *  - Only replace the result after an SDX or SCU?
1005 */
1006
1007static void
1008_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1009                            UErrorCode *pErrorCode) {
1010    UConverter *cnv;
1011    SCSUData *scsu;
1012    const UChar *source, *sourceLimit;
1013    uint8_t *target;
1014    int32_t targetCapacity;
1015    int32_t *offsets;
1016
1017    UBool isSingleByteMode;
1018    uint8_t dynamicWindow;
1019    uint32_t currentOffset;
1020
1021    uint32_t c, delta;
1022
1023    int32_t sourceIndex, nextSourceIndex;
1024
1025    int32_t length;
1026
1027    /* variables for compression heuristics */
1028    uint32_t offset;
1029    UChar lead, trail;
1030    int code;
1031    int8_t window;
1032
1033    /* set up the local pointers */
1034    cnv=pArgs->converter;
1035    scsu=(SCSUData *)cnv->extraInfo;
1036
1037    /* set up the local pointers */
1038    source=pArgs->source;
1039    sourceLimit=pArgs->sourceLimit;
1040    target=(uint8_t *)pArgs->target;
1041    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1042    offsets=pArgs->offsets;
1043
1044    /* get the state machine state */
1045    isSingleByteMode=scsu->fromUIsSingleByteMode;
1046    dynamicWindow=scsu->fromUDynamicWindow;
1047    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1048
1049    c=cnv->fromUChar32;
1050
1051    /* sourceIndex=-1 if the current character began in the previous buffer */
1052    sourceIndex= c==0 ? 0 : -1;
1053    nextSourceIndex=0;
1054
1055    /* similar conversion "loop" as in toUnicode */
1056loop:
1057    if(isSingleByteMode) {
1058        if(c!=0 && targetCapacity>0) {
1059            goto getTrailSingle;
1060        }
1061
1062        /* state machine for single-byte mode */
1063/* singleByteMode: */
1064        while(source<sourceLimit) {
1065            if(targetCapacity<=0) {
1066                /* target is full */
1067                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1068                break;
1069            }
1070            c=*source++;
1071            ++nextSourceIndex;
1072
1073            if((c-0x20)<=0x5f) {
1074                /* pass US-ASCII graphic character through */
1075                *target++=(uint8_t)c;
1076                if(offsets!=NULL) {
1077                    *offsets++=sourceIndex;
1078                }
1079                --targetCapacity;
1080            } else if(c<0x20) {
1081                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1082                    /* CR/LF/TAB/NUL */
1083                    *target++=(uint8_t)c;
1084                    if(offsets!=NULL) {
1085                        *offsets++=sourceIndex;
1086                    }
1087                    --targetCapacity;
1088                } else {
1089                    /* quote C0 control character */
1090                    c|=SQ0<<8;
1091                    length=2;
1092                    goto outputBytes;
1093                }
1094            } else if((delta=c-currentOffset)<=0x7f) {
1095                /* use the current dynamic window */
1096                *target++=(uint8_t)(delta|0x80);
1097                if(offsets!=NULL) {
1098                    *offsets++=sourceIndex;
1099                }
1100                --targetCapacity;
1101            } else if(UTF_IS_SURROGATE(c)) {
1102                if(UTF_IS_SURROGATE_FIRST(c)) {
1103getTrailSingle:
1104                    lead=(UChar)c;
1105                    if(source<sourceLimit) {
1106                        /* test the following code unit */
1107                        trail=*source;
1108                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1109                            ++source;
1110                            ++nextSourceIndex;
1111                            c=UTF16_GET_PAIR_VALUE(c, trail);
1112                            /* convert this surrogate code point */
1113                            /* exit this condition tree */
1114                        } else {
1115                            /* this is an unmatched lead code unit (1st surrogate) */
1116                            /* callback(illegal) */
1117                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1118                            goto endloop;
1119                        }
1120                    } else {
1121                        /* no more input */
1122                        break;
1123                    }
1124                } else {
1125                    /* this is an unmatched trail code unit (2nd surrogate) */
1126                    /* callback(illegal) */
1127                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1128                    goto endloop;
1129                }
1130
1131                /* compress supplementary character U+10000..U+10ffff */
1132                if((delta=c-currentOffset)<=0x7f) {
1133                    /* use the current dynamic window */
1134                    *target++=(uint8_t)(delta|0x80);
1135                    if(offsets!=NULL) {
1136                        *offsets++=sourceIndex;
1137                    }
1138                    --targetCapacity;
1139                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1140                    /* there is a dynamic window that contains this character, change to it */
1141                    dynamicWindow=window;
1142                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1143                    useDynamicWindow(scsu, dynamicWindow);
1144                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1145                    length=2;
1146                    goto outputBytes;
1147                } else if((code=getDynamicOffset(c, &offset))>=0) {
1148                    /* might check if there are more characters in this window to come */
1149                    /* define an extended window with this character */
1150                    code-=0x200;
1151                    dynamicWindow=getNextDynamicWindow(scsu);
1152                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1153                    useDynamicWindow(scsu, dynamicWindow);
1154                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1155                    length=4;
1156                    goto outputBytes;
1157                } else {
1158                    /* change to Unicode mode and output this (lead, trail) pair */
1159                    isSingleByteMode=FALSE;
1160                    *target++=(uint8_t)SCU;
1161                    if(offsets!=NULL) {
1162                        *offsets++=sourceIndex;
1163                    }
1164                    --targetCapacity;
1165                    c=((uint32_t)lead<<16)|trail;
1166                    length=4;
1167                    goto outputBytes;
1168                }
1169            } else if(c<0xa0) {
1170                /* quote C1 control character */
1171                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1172                length=2;
1173                goto outputBytes;
1174            } else if(c==0xfeff || c>=0xfff0) {
1175                /* quote signature character=byte order mark and specials */
1176                c|=SQU<<16;
1177                length=3;
1178                goto outputBytes;
1179            } else {
1180                /* compress all other BMP characters */
1181                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1182                    /* there is a window defined that contains this character - switch to it or quote from it? */
1183                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1184                        /* change to dynamic window */
1185                        dynamicWindow=window;
1186                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1187                        useDynamicWindow(scsu, dynamicWindow);
1188                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1189                        length=2;
1190                        goto outputBytes;
1191                    } else {
1192                        /* quote from dynamic window */
1193                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1194                        length=2;
1195                        goto outputBytes;
1196                    }
1197                } else if((window=getWindow(staticOffsets, c))>=0) {
1198                    /* quote from static window */
1199                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1200                    length=2;
1201                    goto outputBytes;
1202                } else if((code=getDynamicOffset(c, &offset))>=0) {
1203                    /* define a dynamic window with this character */
1204                    dynamicWindow=getNextDynamicWindow(scsu);
1205                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1206                    useDynamicWindow(scsu, dynamicWindow);
1207                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1208                    length=3;
1209                    goto outputBytes;
1210                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1211                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1212                ) {
1213                    /*
1214                     * this character is not compressible (a BMP ideograph or similar);
1215                     * switch to Unicode mode if this is the last character in the block
1216                     * or there is at least one more ideograph following immediately
1217                     */
1218                    isSingleByteMode=FALSE;
1219                    c|=SCU<<16;
1220                    length=3;
1221                    goto outputBytes;
1222                } else {
1223                    /* quote Unicode */
1224                    c|=SQU<<16;
1225                    length=3;
1226                    goto outputBytes;
1227                }
1228            }
1229
1230            /* normal end of conversion: prepare for a new character */
1231            c=0;
1232            sourceIndex=nextSourceIndex;
1233        }
1234    } else {
1235        if(c!=0 && targetCapacity>0) {
1236            goto getTrailUnicode;
1237        }
1238
1239        /* state machine for Unicode mode */
1240/* unicodeByteMode: */
1241        while(source<sourceLimit) {
1242            if(targetCapacity<=0) {
1243                /* target is full */
1244                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1245                break;
1246            }
1247            c=*source++;
1248            ++nextSourceIndex;
1249
1250            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1251                /* not compressible, write character directly */
1252                if(targetCapacity>=2) {
1253                    *target++=(uint8_t)(c>>8);
1254                    *target++=(uint8_t)c;
1255                    if(offsets!=NULL) {
1256                        *offsets++=sourceIndex;
1257                        *offsets++=sourceIndex;
1258                    }
1259                    targetCapacity-=2;
1260                } else {
1261                    length=2;
1262                    goto outputBytes;
1263                }
1264            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1265                /* compress BMP character if the following one is not an uncompressible ideograph */
1266                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1267                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1268                        /* ASCII digit or letter */
1269                        isSingleByteMode=TRUE;
1270                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1271                        length=2;
1272                        goto outputBytes;
1273                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1274                        /* there is a dynamic window that contains this character, change to it */
1275                        isSingleByteMode=TRUE;
1276                        dynamicWindow=window;
1277                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1278                        useDynamicWindow(scsu, dynamicWindow);
1279                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1280                        length=2;
1281                        goto outputBytes;
1282                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1283                        /* define a dynamic window with this character */
1284                        isSingleByteMode=TRUE;
1285                        dynamicWindow=getNextDynamicWindow(scsu);
1286                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1287                        useDynamicWindow(scsu, dynamicWindow);
1288                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1289                        length=3;
1290                        goto outputBytes;
1291                    }
1292                }
1293
1294                /* don't know how to compress this character, just write it directly */
1295                length=2;
1296                goto outputBytes;
1297            } else if(c<0xe000) {
1298                /* c is a surrogate */
1299                if(UTF_IS_SURROGATE_FIRST(c)) {
1300getTrailUnicode:
1301                    lead=(UChar)c;
1302                    if(source<sourceLimit) {
1303                        /* test the following code unit */
1304                        trail=*source;
1305                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1306                            ++source;
1307                            ++nextSourceIndex;
1308                            c=UTF16_GET_PAIR_VALUE(c, trail);
1309                            /* convert this surrogate code point */
1310                            /* exit this condition tree */
1311                        } else {
1312                            /* this is an unmatched lead code unit (1st surrogate) */
1313                            /* callback(illegal) */
1314                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1315                            goto endloop;
1316                        }
1317                    } else {
1318                        /* no more input */
1319                        break;
1320                    }
1321                } else {
1322                    /* this is an unmatched trail code unit (2nd surrogate) */
1323                    /* callback(illegal) */
1324                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1325                    goto endloop;
1326                }
1327
1328                /* compress supplementary character */
1329                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1330                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1331                ) {
1332                    /*
1333                     * there is a dynamic window that contains this character and
1334                     * the following character is not uncompressible,
1335                     * change to the window
1336                     */
1337                    isSingleByteMode=TRUE;
1338                    dynamicWindow=window;
1339                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1340                    useDynamicWindow(scsu, dynamicWindow);
1341                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1342                    length=2;
1343                    goto outputBytes;
1344                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1345                          (code=getDynamicOffset(c, &offset))>=0
1346                ) {
1347                    /* two supplementary characters in (probably) the same window - define an extended one */
1348                    isSingleByteMode=TRUE;
1349                    code-=0x200;
1350                    dynamicWindow=getNextDynamicWindow(scsu);
1351                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1352                    useDynamicWindow(scsu, dynamicWindow);
1353                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1354                    length=4;
1355                    goto outputBytes;
1356                } else {
1357                    /* don't know how to compress this character, just write it directly */
1358                    c=((uint32_t)lead<<16)|trail;
1359                    length=4;
1360                    goto outputBytes;
1361                }
1362            } else /* 0xe000<=c<0xf300 */ {
1363                /* quote to avoid SCSU tags */
1364                c|=UQU<<16;
1365                length=3;
1366                goto outputBytes;
1367            }
1368
1369            /* normal end of conversion: prepare for a new character */
1370            c=0;
1371            sourceIndex=nextSourceIndex;
1372        }
1373    }
1374endloop:
1375
1376    /* set the converter state back into UConverter */
1377    scsu->fromUIsSingleByteMode=isSingleByteMode;
1378    scsu->fromUDynamicWindow=dynamicWindow;
1379
1380    cnv->fromUChar32=c;
1381
1382    /* write back the updated pointers */
1383    pArgs->source=source;
1384    pArgs->target=(char *)target;
1385    pArgs->offsets=offsets;
1386    return;
1387
1388outputBytes:
1389    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1390    /* from the first if in the loop we know that targetCapacity>0 */
1391    if(length<=targetCapacity) {
1392        if(offsets==NULL) {
1393            switch(length) {
1394                /* each branch falls through to the next one */
1395            case 4:
1396                *target++=(uint8_t)(c>>24);
1397            case 3:
1398                *target++=(uint8_t)(c>>16);
1399            case 2:
1400                *target++=(uint8_t)(c>>8);
1401            case 1:
1402                *target++=(uint8_t)c;
1403            default:
1404                /* will never occur */
1405                break;
1406            }
1407        } else {
1408            switch(length) {
1409                /* each branch falls through to the next one */
1410            case 4:
1411                *target++=(uint8_t)(c>>24);
1412                *offsets++=sourceIndex;
1413            case 3:
1414                *target++=(uint8_t)(c>>16);
1415                *offsets++=sourceIndex;
1416            case 2:
1417                *target++=(uint8_t)(c>>8);
1418                *offsets++=sourceIndex;
1419            case 1:
1420                *target++=(uint8_t)c;
1421                *offsets++=sourceIndex;
1422            default:
1423                /* will never occur */
1424                break;
1425            }
1426        }
1427        targetCapacity-=length;
1428
1429        /* normal end of conversion: prepare for a new character */
1430        c=0;
1431        sourceIndex=nextSourceIndex;
1432        goto loop;
1433    } else {
1434        uint8_t *p;
1435
1436        /*
1437         * We actually do this backwards here:
1438         * In order to save an intermediate variable, we output
1439         * first to the overflow buffer what does not fit into the
1440         * regular target.
1441         */
1442        /* we know that 0<=targetCapacity<length<=4 */
1443        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1444        length-=targetCapacity;
1445        p=(uint8_t *)cnv->charErrorBuffer;
1446        switch(length) {
1447            /* each branch falls through to the next one */
1448        case 4:
1449            *p++=(uint8_t)(c>>24);
1450        case 3:
1451            *p++=(uint8_t)(c>>16);
1452        case 2:
1453            *p++=(uint8_t)(c>>8);
1454        case 1:
1455            *p=(uint8_t)c;
1456        default:
1457            /* will never occur */
1458            break;
1459        }
1460        cnv->charErrorBufferLength=(int8_t)length;
1461
1462        /* now output what fits into the regular target */
1463        c>>=8*length; /* length was reduced by targetCapacity */
1464        switch(targetCapacity) {
1465            /* each branch falls through to the next one */
1466        case 3:
1467            *target++=(uint8_t)(c>>16);
1468            if(offsets!=NULL) {
1469                *offsets++=sourceIndex;
1470            }
1471        case 2:
1472            *target++=(uint8_t)(c>>8);
1473            if(offsets!=NULL) {
1474                *offsets++=sourceIndex;
1475            }
1476        case 1:
1477            *target++=(uint8_t)c;
1478            if(offsets!=NULL) {
1479                *offsets++=sourceIndex;
1480            }
1481        default:
1482            break;
1483        }
1484
1485        /* target overflow */
1486        targetCapacity=0;
1487        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1488        c=0;
1489        goto endloop;
1490    }
1491}
1492
1493/*
1494 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1495 * If a change is made in the original function, then either
1496 * change this function the same way or
1497 * re-copy the original function and remove the variables
1498 * offsets, sourceIndex, and nextSourceIndex.
1499 */
1500static void
1501_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1502                 UErrorCode *pErrorCode) {
1503    UConverter *cnv;
1504    SCSUData *scsu;
1505    const UChar *source, *sourceLimit;
1506    uint8_t *target;
1507    int32_t targetCapacity;
1508
1509    UBool isSingleByteMode;
1510    uint8_t dynamicWindow;
1511    uint32_t currentOffset;
1512
1513    uint32_t c, delta;
1514
1515    int32_t length;
1516
1517    /* variables for compression heuristics */
1518    uint32_t offset;
1519    UChar lead, trail;
1520    int code;
1521    int8_t window;
1522
1523    /* set up the local pointers */
1524    cnv=pArgs->converter;
1525    scsu=(SCSUData *)cnv->extraInfo;
1526
1527    /* set up the local pointers */
1528    source=pArgs->source;
1529    sourceLimit=pArgs->sourceLimit;
1530    target=(uint8_t *)pArgs->target;
1531    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1532
1533    /* get the state machine state */
1534    isSingleByteMode=scsu->fromUIsSingleByteMode;
1535    dynamicWindow=scsu->fromUDynamicWindow;
1536    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1537
1538    c=cnv->fromUChar32;
1539
1540    /* similar conversion "loop" as in toUnicode */
1541loop:
1542    if(isSingleByteMode) {
1543        if(c!=0 && targetCapacity>0) {
1544            goto getTrailSingle;
1545        }
1546
1547        /* state machine for single-byte mode */
1548/* singleByteMode: */
1549        while(source<sourceLimit) {
1550            if(targetCapacity<=0) {
1551                /* target is full */
1552                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1553                break;
1554            }
1555            c=*source++;
1556
1557            if((c-0x20)<=0x5f) {
1558                /* pass US-ASCII graphic character through */
1559                *target++=(uint8_t)c;
1560                --targetCapacity;
1561            } else if(c<0x20) {
1562                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1563                    /* CR/LF/TAB/NUL */
1564                    *target++=(uint8_t)c;
1565                    --targetCapacity;
1566                } else {
1567                    /* quote C0 control character */
1568                    c|=SQ0<<8;
1569                    length=2;
1570                    goto outputBytes;
1571                }
1572            } else if((delta=c-currentOffset)<=0x7f) {
1573                /* use the current dynamic window */
1574                *target++=(uint8_t)(delta|0x80);
1575                --targetCapacity;
1576            } else if(UTF_IS_SURROGATE(c)) {
1577                if(UTF_IS_SURROGATE_FIRST(c)) {
1578getTrailSingle:
1579                    lead=(UChar)c;
1580                    if(source<sourceLimit) {
1581                        /* test the following code unit */
1582                        trail=*source;
1583                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1584                            ++source;
1585                            c=UTF16_GET_PAIR_VALUE(c, trail);
1586                            /* convert this surrogate code point */
1587                            /* exit this condition tree */
1588                        } else {
1589                            /* this is an unmatched lead code unit (1st surrogate) */
1590                            /* callback(illegal) */
1591                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1592                            goto endloop;
1593                        }
1594                    } else {
1595                        /* no more input */
1596                        break;
1597                    }
1598                } else {
1599                    /* this is an unmatched trail code unit (2nd surrogate) */
1600                    /* callback(illegal) */
1601                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1602                    goto endloop;
1603                }
1604
1605                /* compress supplementary character U+10000..U+10ffff */
1606                if((delta=c-currentOffset)<=0x7f) {
1607                    /* use the current dynamic window */
1608                    *target++=(uint8_t)(delta|0x80);
1609                    --targetCapacity;
1610                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1611                    /* there is a dynamic window that contains this character, change to it */
1612                    dynamicWindow=window;
1613                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1614                    useDynamicWindow(scsu, dynamicWindow);
1615                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1616                    length=2;
1617                    goto outputBytes;
1618                } else if((code=getDynamicOffset(c, &offset))>=0) {
1619                    /* might check if there are more characters in this window to come */
1620                    /* define an extended window with this character */
1621                    code-=0x200;
1622                    dynamicWindow=getNextDynamicWindow(scsu);
1623                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1624                    useDynamicWindow(scsu, dynamicWindow);
1625                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1626                    length=4;
1627                    goto outputBytes;
1628                } else {
1629                    /* change to Unicode mode and output this (lead, trail) pair */
1630                    isSingleByteMode=FALSE;
1631                    *target++=(uint8_t)SCU;
1632                    --targetCapacity;
1633                    c=((uint32_t)lead<<16)|trail;
1634                    length=4;
1635                    goto outputBytes;
1636                }
1637            } else if(c<0xa0) {
1638                /* quote C1 control character */
1639                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1640                length=2;
1641                goto outputBytes;
1642            } else if(c==0xfeff || c>=0xfff0) {
1643                /* quote signature character=byte order mark and specials */
1644                c|=SQU<<16;
1645                length=3;
1646                goto outputBytes;
1647            } else {
1648                /* compress all other BMP characters */
1649                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1650                    /* there is a window defined that contains this character - switch to it or quote from it? */
1651                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1652                        /* change to dynamic window */
1653                        dynamicWindow=window;
1654                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1655                        useDynamicWindow(scsu, dynamicWindow);
1656                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1657                        length=2;
1658                        goto outputBytes;
1659                    } else {
1660                        /* quote from dynamic window */
1661                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1662                        length=2;
1663                        goto outputBytes;
1664                    }
1665                } else if((window=getWindow(staticOffsets, c))>=0) {
1666                    /* quote from static window */
1667                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1668                    length=2;
1669                    goto outputBytes;
1670                } else if((code=getDynamicOffset(c, &offset))>=0) {
1671                    /* define a dynamic window with this character */
1672                    dynamicWindow=getNextDynamicWindow(scsu);
1673                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1674                    useDynamicWindow(scsu, dynamicWindow);
1675                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1676                    length=3;
1677                    goto outputBytes;
1678                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1679                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1680                ) {
1681                    /*
1682                     * this character is not compressible (a BMP ideograph or similar);
1683                     * switch to Unicode mode if this is the last character in the block
1684                     * or there is at least one more ideograph following immediately
1685                     */
1686                    isSingleByteMode=FALSE;
1687                    c|=SCU<<16;
1688                    length=3;
1689                    goto outputBytes;
1690                } else {
1691                    /* quote Unicode */
1692                    c|=SQU<<16;
1693                    length=3;
1694                    goto outputBytes;
1695                }
1696            }
1697
1698            /* normal end of conversion: prepare for a new character */
1699            c=0;
1700        }
1701    } else {
1702        if(c!=0 && targetCapacity>0) {
1703            goto getTrailUnicode;
1704        }
1705
1706        /* state machine for Unicode mode */
1707/* unicodeByteMode: */
1708        while(source<sourceLimit) {
1709            if(targetCapacity<=0) {
1710                /* target is full */
1711                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1712                break;
1713            }
1714            c=*source++;
1715
1716            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1717                /* not compressible, write character directly */
1718                if(targetCapacity>=2) {
1719                    *target++=(uint8_t)(c>>8);
1720                    *target++=(uint8_t)c;
1721                    targetCapacity-=2;
1722                } else {
1723                    length=2;
1724                    goto outputBytes;
1725                }
1726            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1727                /* compress BMP character if the following one is not an uncompressible ideograph */
1728                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1729                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1730                        /* ASCII digit or letter */
1731                        isSingleByteMode=TRUE;
1732                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1733                        length=2;
1734                        goto outputBytes;
1735                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1736                        /* there is a dynamic window that contains this character, change to it */
1737                        isSingleByteMode=TRUE;
1738                        dynamicWindow=window;
1739                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1740                        useDynamicWindow(scsu, dynamicWindow);
1741                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1742                        length=2;
1743                        goto outputBytes;
1744                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1745                        /* define a dynamic window with this character */
1746                        isSingleByteMode=TRUE;
1747                        dynamicWindow=getNextDynamicWindow(scsu);
1748                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1749                        useDynamicWindow(scsu, dynamicWindow);
1750                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1751                        length=3;
1752                        goto outputBytes;
1753                    }
1754                }
1755
1756                /* don't know how to compress this character, just write it directly */
1757                length=2;
1758                goto outputBytes;
1759            } else if(c<0xe000) {
1760                /* c is a surrogate */
1761                if(UTF_IS_SURROGATE_FIRST(c)) {
1762getTrailUnicode:
1763                    lead=(UChar)c;
1764                    if(source<sourceLimit) {
1765                        /* test the following code unit */
1766                        trail=*source;
1767                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1768                            ++source;
1769                            c=UTF16_GET_PAIR_VALUE(c, trail);
1770                            /* convert this surrogate code point */
1771                            /* exit this condition tree */
1772                        } else {
1773                            /* this is an unmatched lead code unit (1st surrogate) */
1774                            /* callback(illegal) */
1775                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1776                            goto endloop;
1777                        }
1778                    } else {
1779                        /* no more input */
1780                        break;
1781                    }
1782                } else {
1783                    /* this is an unmatched trail code unit (2nd surrogate) */
1784                    /* callback(illegal) */
1785                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1786                    goto endloop;
1787                }
1788
1789                /* compress supplementary character */
1790                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1791                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1792                ) {
1793                    /*
1794                     * there is a dynamic window that contains this character and
1795                     * the following character is not uncompressible,
1796                     * change to the window
1797                     */
1798                    isSingleByteMode=TRUE;
1799                    dynamicWindow=window;
1800                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1801                    useDynamicWindow(scsu, dynamicWindow);
1802                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1803                    length=2;
1804                    goto outputBytes;
1805                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1806                          (code=getDynamicOffset(c, &offset))>=0
1807                ) {
1808                    /* two supplementary characters in (probably) the same window - define an extended one */
1809                    isSingleByteMode=TRUE;
1810                    code-=0x200;
1811                    dynamicWindow=getNextDynamicWindow(scsu);
1812                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1813                    useDynamicWindow(scsu, dynamicWindow);
1814                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1815                    length=4;
1816                    goto outputBytes;
1817                } else {
1818                    /* don't know how to compress this character, just write it directly */
1819                    c=((uint32_t)lead<<16)|trail;
1820                    length=4;
1821                    goto outputBytes;
1822                }
1823            } else /* 0xe000<=c<0xf300 */ {
1824                /* quote to avoid SCSU tags */
1825                c|=UQU<<16;
1826                length=3;
1827                goto outputBytes;
1828            }
1829
1830            /* normal end of conversion: prepare for a new character */
1831            c=0;
1832        }
1833    }
1834endloop:
1835
1836    /* set the converter state back into UConverter */
1837    scsu->fromUIsSingleByteMode=isSingleByteMode;
1838    scsu->fromUDynamicWindow=dynamicWindow;
1839
1840    cnv->fromUChar32=c;
1841
1842    /* write back the updated pointers */
1843    pArgs->source=source;
1844    pArgs->target=(char *)target;
1845    return;
1846
1847outputBytes:
1848    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1849    /* from the first if in the loop we know that targetCapacity>0 */
1850    if(length<=targetCapacity) {
1851        switch(length) {
1852            /* each branch falls through to the next one */
1853        case 4:
1854            *target++=(uint8_t)(c>>24);
1855        case 3:
1856            *target++=(uint8_t)(c>>16);
1857        case 2:
1858            *target++=(uint8_t)(c>>8);
1859        case 1:
1860            *target++=(uint8_t)c;
1861        default:
1862            /* will never occur */
1863            break;
1864        }
1865        targetCapacity-=length;
1866
1867        /* normal end of conversion: prepare for a new character */
1868        c=0;
1869        goto loop;
1870    } else {
1871        uint8_t *p;
1872
1873        /*
1874         * We actually do this backwards here:
1875         * In order to save an intermediate variable, we output
1876         * first to the overflow buffer what does not fit into the
1877         * regular target.
1878         */
1879        /* we know that 0<=targetCapacity<length<=4 */
1880        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1881        length-=targetCapacity;
1882        p=(uint8_t *)cnv->charErrorBuffer;
1883        switch(length) {
1884            /* each branch falls through to the next one */
1885        case 4:
1886            *p++=(uint8_t)(c>>24);
1887        case 3:
1888            *p++=(uint8_t)(c>>16);
1889        case 2:
1890            *p++=(uint8_t)(c>>8);
1891        case 1:
1892            *p=(uint8_t)c;
1893        default:
1894            /* will never occur */
1895            break;
1896        }
1897        cnv->charErrorBufferLength=(int8_t)length;
1898
1899        /* now output what fits into the regular target */
1900        c>>=8*length; /* length was reduced by targetCapacity */
1901        switch(targetCapacity) {
1902            /* each branch falls through to the next one */
1903        case 3:
1904            *target++=(uint8_t)(c>>16);
1905        case 2:
1906            *target++=(uint8_t)(c>>8);
1907        case 1:
1908            *target++=(uint8_t)c;
1909        default:
1910            break;
1911        }
1912
1913        /* target overflow */
1914        targetCapacity=0;
1915        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1916        c=0;
1917        goto endloop;
1918    }
1919}
1920
1921/* miscellaneous ------------------------------------------------------------ */
1922
1923static const char *
1924_SCSUGetName(const UConverter *cnv) {
1925    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1926
1927    switch(scsu->locale) {
1928    case l_ja:
1929        return "SCSU,locale=ja";
1930    default:
1931        return "SCSU";
1932    }
1933}
1934
1935/* structure for SafeClone calculations */
1936struct cloneSCSUStruct
1937{
1938    UConverter cnv;
1939    SCSUData mydata;
1940};
1941
1942static UConverter *
1943_SCSUSafeClone(const UConverter *cnv,
1944               void *stackBuffer,
1945               int32_t *pBufferSize,
1946               UErrorCode *status)
1947{
1948    struct cloneSCSUStruct * localClone;
1949    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1950
1951    if (U_FAILURE(*status)){
1952        return 0;
1953    }
1954
1955    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1956        *pBufferSize = bufferSizeNeeded;
1957        return 0;
1958    }
1959
1960    localClone = (struct cloneSCSUStruct *)stackBuffer;
1961    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1962
1963    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1964    localClone->cnv.extraInfo = &localClone->mydata;
1965    localClone->cnv.isExtraLocal = TRUE;
1966
1967    return &localClone->cnv;
1968}
1969
1970
1971static const UConverterImpl _SCSUImpl={
1972    UCNV_SCSU,
1973
1974    NULL,
1975    NULL,
1976
1977    _SCSUOpen,
1978    _SCSUClose,
1979    _SCSUReset,
1980
1981    _SCSUToUnicode,
1982    _SCSUToUnicodeWithOffsets,
1983    _SCSUFromUnicode,
1984    _SCSUFromUnicodeWithOffsets,
1985    NULL,
1986
1987    NULL,
1988    _SCSUGetName,
1989    NULL,
1990    _SCSUSafeClone,
1991    ucnv_getCompleteUnicodeSet
1992};
1993
1994static const UConverterStaticData _SCSUStaticData={
1995    sizeof(UConverterStaticData),
1996    "SCSU",
1997    1212, /* CCSID for SCSU */
1998    UCNV_IBM, UCNV_SCSU,
1999    1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
2000    /*
2001     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2002     * substitution string.
2003     */
2004    { 0x0e, 0xff, 0xfd, 0 }, 3,
2005    FALSE, FALSE,
2006    0,
2007    0,
2008    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2009};
2010
2011const UConverterSharedData _SCSUData={
2012    sizeof(UConverterSharedData), ~((uint32_t)0),
2013    NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
2014    0
2015};
2016
2017#endif
2018