1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Copyright (C) 2000-2009, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  ucnvscsu.c
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2000nov18
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   This is an implementation of the Standard Compression Scheme for Unicode
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Reserved commands and window settings are treated as illegal sequences and
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   will result in callback calls.
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU definitions --------------------------------------------------------- */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU command byte values */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SQ0=0x01, /* Quote from window pair 0 */
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SQ7=0x08, /* Quote from window pair 7 */
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SDX=0x0B, /* Define a window as extended */
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Srs=0x0C, /* reserved */
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SQU=0x0E, /* Quote a single Unicode character */
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCU=0x0F, /* Change to Unicode mode */
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SC0=0x10, /* Select window 0 */
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SC7=0x17, /* Select window 7 */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SD0=0x18, /* Define and select window 0 */
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SD7=0x1F, /* Define and select window 7 */
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UC0=0xE0, /* Select window 0 */
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UC7=0xE7, /* Select window 7 */
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UD0=0xE8, /* Define and select window 0 */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UD7=0xEF, /* Define and select window 7 */
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UQU=0xF0, /* Quote a single Unicode character */
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UDX=0xF1, /* Define a Window as extended */
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Urs=0xF2  /* reserved */
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Unicode code points from 3400 to E000 are not adressible by
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * dynamic window, since in these areas no short run alphabets are
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * found. Therefore add gapOffset to all values from gapThreshold.
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    gapThreshold=0x68,
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    gapOffset=0xAC00,
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* values between reservedStart and fixedThreshold are reserved */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    reservedStart=0xA8,
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* use table of predefined fixed offsets for values from fixedThreshold */
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fixedThreshold=0xF9
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* constant offsets for the 8 static windows */
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t staticOffsets[8]={
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0000, /* ASCII for quoted tags */
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0100, /* Latin Extended-A */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0300, /* Combining Diacritical Marks */
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x2000, /* General Punctuation */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x2080, /* Currency Symbols */
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x2100, /* Letterlike Symbols and Number Forms */
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x3000  /* CJK Symbols and punctuation */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* initial offsets for the 8 dynamic (sliding) windows */
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t initialDynamicOffsets[8]={
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0080, /* Latin-1 */
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x00C0, /* Latin Extended A */
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0400, /* Cyrillic */
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0600, /* Arabic */
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0900, /* Devanagari */
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x3040, /* Hiragana */
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x30A0, /* Katakana */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0xFF00  /* Fullwidth ASCII */
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Table of fixed predefined Offsets */
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t fixedOffsets[]={
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFA */ 0x0250, /* IPA extensions */
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFB */ 0x0370, /* Greek */
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFC */ 0x0530, /* Armenian */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFD */ 0x3040, /* Hiragana */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFE */ 0x30A0, /* Katakana */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* state values */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    readCommand,
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quotePairOne,
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quotePairTwo,
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quoteOne,
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    definePairOne,
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    definePairTwo,
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    defineOne
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct SCSUData {
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t toUDynamicOffsets[8];
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t fromUDynamicOffsets[8];
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* state machine state - toUnicode */
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool toUIsSingleByteMode;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t toUState;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t toUQuoteWindow, toUDynamicWindow;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t toUByteOne;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t toUPadding[3];
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* state machine state - fromUnicode */
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool fromUIsSingleByteMode;
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t fromUDynamicWindow;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * windowUse[] keeps track of the use of the dynamic windows:
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * At nextWindowUseIndex there is the least recently used window,
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and the following windows (in a wrapping manner) are more and more
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * recently used.
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * At nextWindowUseIndex-1 there is the most recently used window.
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t locale;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t nextWindowUseIndex;
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t windowUse[8];
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} SCSUData;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    lGeneric, l_ja
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU setup functions ----------------------------------------------------- */
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(choice<=UCNV_RESET_TO_UNICODE) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset toUnicode */
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->toUIsSingleByteMode=TRUE;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->toUState=readCommand;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->toUByteOne=0;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=0;
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(choice!=UCNV_RESET_TO_UNICODE) {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset fromUnicode */
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->fromUIsSingleByteMode=TRUE;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->fromUDynamicWindow=0;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->nextWindowUseIndex=0;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(scsu->locale) {
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case l_ja:
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32=0;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUOpen(UConverter *cnv,
19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho          UConverterLoadArgs *pArgs,
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          UErrorCode *pErrorCode) {
19585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    const char *locale=pArgs->locale;
19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(pArgs->onlyTestIsLoadable) {
19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cnv->extraInfo!=NULL) {
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _SCSUReset(cnv, UCNV_RESET_BOTH);
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Set the substitution character U+fffd as a Unicode string. */
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->subUChars[0]=0xfffd;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->subCharLen=-1;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUClose(UConverter *cnv) {
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cnv->extraInfo!=NULL) {
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!cnv->isExtraLocal) {
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_free(cnv->extraInfo);
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->extraInfo=NULL;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-to-Unicode conversion functions ------------------------------------- */
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UErrorCode *pErrorCode) {
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target;
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit;
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isSingleByteMode;
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t state, byteOne;
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t quoteWindow, dynamicWindow;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex, nextSourceIndex;
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu=(SCSUData *)cnv->extraInfo;
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pArgs->target;
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetLimit=pArgs->targetLimit;
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the state machine state */
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isSingleByteMode=scsu->toUIsSingleByteMode;
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    state=scsu->toUState;
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quoteWindow=scsu->toUQuoteWindow;
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dynamicWindow=scsu->toUDynamicWindow;
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteOne=scsu->toUByteOne;
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=state==readCommand ? 0 : -1;
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    nextSourceIndex=0;
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * conversion "loop"
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For performance, this is not a normal C loop.
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Instead, there are two code blocks for the two SCSU modes.
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The function branches to either one, and a change of the mode is done with a goto to
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the other branch.
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Each branch has two conventional loops:
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - a fast-path loop for the most common codes in the mode
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - a loop for all other codes in the mode
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * When the fast-path runs into a code that it cannot handle, its loop ends and it
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * runs into the following loop to handle the other codes.
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The end of the input or output buffer is also handled by the slower loop.
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The callback handling is done by returning with an error code.
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The conversion framework actually calls the callback function.
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(isSingleByteMode) {
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* fast path for single-byte mode */
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(state==readCommand) {
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++source;
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++nextSourceIndex;
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b<=0x7f) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write US-ASCII graphic character or DEL */
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)b;
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write from dynamic window */
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(c<=0xffff) {
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)c;
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(offsets!=NULL) {
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *offsets++=sourceIndex;
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* output surrogate pair */
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)(0xd7c0+(c>>10));
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(target<targetLimit) {
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *target++=(UChar)(0xdc00|(c&0x3ff));
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            if(offsets!=NULL) {
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            }
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* target overflow */
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            if(offsets!=NULL) {
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBufferLength=1;
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode:
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target>=targetLimit) {
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source++;
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(state) {
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case readCommand:
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* redundant conditions are commented out */
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* here: b<0x20 because otherwise we would be in fastSingle */
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* CR/LF/TAB/NUL */
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)b;
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceIndex=nextSourceIndex;
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(SC0<=b) {
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(b<=SC7) {
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=(int8_t)(b-SC0);
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        sourceIndex=nextSourceIndex;
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto fastSingle;
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else /* if(SD0<=b && b<=SD7) */ {
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=(int8_t)(b-SD0);
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        state=defineOne;
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* SQ0<=b && */ b<=SQ7) {
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    quoteWindow=(int8_t)(b-SQ0);
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quoteOne;
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SDX) {
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=definePairOne;
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SQU) {
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairOne;
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SCU) {
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceIndex=nextSourceIndex;
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastUnicode;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else /* Srs */ {
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* store the first byte of a multibyte sequence in toUBytes[] */
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[0]=b;
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=1;
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairOne:
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=b;
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=quotePairTwo;
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairTwo:
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((byteOne<<8)|b);
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsets!=NULL) {
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *offsets++=sourceIndex;
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quoteOne:
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b<0x80) {
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* all static offsets are in the BMP */
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write from dynamic window */
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(c<=0xffff) {
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)c;
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(offsets!=NULL) {
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *offsets++=sourceIndex;
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* output surrogate pair */
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)(0xd7c0+(c>>10));
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(target<targetLimit) {
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *target++=(UChar)(0xdc00|(c&0x3ff));
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            if(offsets!=NULL) {
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            }
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* target overflow */
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            if(offsets!=NULL) {
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                *offsets++=sourceIndex;
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            }
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBufferLength=1;
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case definePairOne:
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                dynamicWindow=(int8_t)((b>>5)&7);
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=(uint8_t)(b&0x1f);
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=definePairTwo;
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case definePairTwo:
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case defineOne:
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b==0) {
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal): Reserved window offset value 0 */
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[1]=b;
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=2;
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b<gapThreshold) {
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b>=fixedThreshold) {
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[1]=b;
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=2;
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* fast path for Unicode mode */
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(state==readCommand) {
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode:
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((b<<8)|source[1]);
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsets!=NULL) {
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *offsets++=sourceIndex;
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                nextSourceIndex+=2;
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source+=2;
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal state machine for Unicode mode */
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target>=targetLimit) {
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source++;
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(state) {
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case readCommand:
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((uint8_t)(b-UC0)>(Urs-UC0)) {
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    byteOne=b;
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairTwo;
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* UC0<=b && */ b<=UC7) {
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=(int8_t)(b-UC0);
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceIndex=nextSourceIndex;
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* UD0<=b && */ b<=UD7) {
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=(int8_t)(b-UD0);
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=defineOne;
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto singleByteMode;
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==UDX) {
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=definePairOne;
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto singleByteMode;
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==UQU) {
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairOne;
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else /* Urs */ {
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairOne:
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=b;
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=quotePairTwo;
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairTwo:
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((byteOne<<8)|b);
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsets!=NULL) {
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *offsets++=sourceIndex;
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastUnicode;
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset to deal with the next character */
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        state=readCommand;
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(state==readCommand) {
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not in a multi-byte sequence, reset toULength */
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=0;
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUIsSingleByteMode=isSingleByteMode;
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUState=state;
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUQuoteWindow=quoteWindow;
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUDynamicWindow=dynamicWindow;
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUByteOne=byteOne;
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex.
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UErrorCode *pErrorCode) {
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu;
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target;
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit;
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isSingleByteMode;
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t state, byteOne;
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t quoteWindow, dynamicWindow;
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu=(SCSUData *)cnv->extraInfo;
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pArgs->target;
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetLimit=pArgs->targetLimit;
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the state machine state */
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isSingleByteMode=scsu->toUIsSingleByteMode;
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    state=scsu->toUState;
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quoteWindow=scsu->toUQuoteWindow;
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dynamicWindow=scsu->toUDynamicWindow;
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteOne=scsu->toUByteOne;
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * conversion "loop"
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For performance, this is not a normal C loop.
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Instead, there are two code blocks for the two SCSU modes.
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The function branches to either one, and a change of the mode is done with a goto to
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the other branch.
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Each branch has two conventional loops:
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - a fast-path loop for the most common codes in the mode
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - a loop for all other codes in the mode
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * When the fast-path runs into a code that it cannot handle, its loop ends and it
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * runs into the following loop to handle the other codes.
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The end of the input or output buffer is also handled by the slower loop.
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The callback handling is done by returning with an error code.
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The conversion framework actually calls the callback function.
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(isSingleByteMode) {
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* fast path for single-byte mode */
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(state==readCommand) {
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++source;
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b<=0x7f) {
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write US-ASCII graphic character or DEL */
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)b;
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write from dynamic window */
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(c<=0xffff) {
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)c;
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* output surrogate pair */
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)(0xd7c0+(c>>10));
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(target<targetLimit) {
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *target++=(UChar)(0xdc00|(c&0x3ff));
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* target overflow */
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBufferLength=1;
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode:
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target>=targetLimit) {
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source++;
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(state) {
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case readCommand:
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* redundant conditions are commented out */
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* here: b<0x20 because otherwise we would be in fastSingle */
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* CR/LF/TAB/NUL */
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)b;
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(SC0<=b) {
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(b<=SC7) {
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=(int8_t)(b-SC0);
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto fastSingle;
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else /* if(SD0<=b && b<=SD7) */ {
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=(int8_t)(b-SD0);
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        state=defineOne;
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* SQ0<=b && */ b<=SQ7) {
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    quoteWindow=(int8_t)(b-SQ0);
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quoteOne;
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SDX) {
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=definePairOne;
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SQU) {
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairOne;
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==SCU) {
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastUnicode;
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else /* Srs */ {
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* store the first byte of a multibyte sequence in toUBytes[] */
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[0]=b;
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=1;
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairOne:
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=b;
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=quotePairTwo;
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairTwo:
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((byteOne<<8)|b);
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quoteOne:
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b<0x80) {
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* all static offsets are in the BMP */
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write from dynamic window */
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(c<=0xffff) {
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)c;
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* output surrogate pair */
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(UChar)(0xd7c0+(c>>10));
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(target<targetLimit) {
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *target++=(UChar)(0xdc00|(c&0x3ff));
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* target overflow */
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cnv->UCharErrorBufferLength=1;
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case definePairOne:
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                dynamicWindow=(int8_t)((b>>5)&7);
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=(uint8_t)(b&0x1f);
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=definePairTwo;
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case definePairTwo:
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case defineOne:
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(b==0) {
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal): Reserved window offset value 0 */
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[1]=b;
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=2;
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b<gapThreshold) {
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b>=fixedThreshold) {
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[1]=b;
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=2;
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* fast path for Unicode mode */
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(state==readCommand) {
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode:
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((b<<8)|source[1]);
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source+=2;
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal state machine for Unicode mode */
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target>=targetLimit) {
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source++;
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(state) {
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case readCommand:
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((uint8_t)(b-UC0)>(Urs-UC0)) {
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    byteOne=b;
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairTwo;
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* UC0<=b && */ b<=UC7) {
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=(int8_t)(b-UC0);
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(/* UD0<=b && */ b<=UD7) {
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=(int8_t)(b-UD0);
809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=defineOne;
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto singleByteMode;
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==UDX) {
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=definePairOne;
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto singleByteMode;
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(b==UQU) {
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    state=quotePairOne;
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else /* Urs */ {
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUBytes[0]=b;
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength=1;
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairOne:
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteOne=b;
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[1]=b;
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength=2;
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=quotePairTwo;
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case quotePairTwo:
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)((byteOne<<8)|b);
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                state=readCommand;
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastUnicode;
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset to deal with the next character */
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        state=readCommand;
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(state==readCommand) {
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not in a multi-byte sequence, reset toULength */
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=0;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUIsSingleByteMode=isSingleByteMode;
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUState=state;
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUQuoteWindow=quoteWindow;
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUDynamicWindow=dynamicWindow;
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->toUByteOne=byteOne;
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-from-Unicode conversion functions ----------------------------------- */
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasonable results. The lookahead is minimal.
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Many cases are simple:
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A character fits directly into the current mode, a dynamic or static window,
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or is not compressible. These cases are tested first.
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Real compression heuristics are applied to the rest, in code branches for
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * single/Unicode mode and BMP/supplementary code points.
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The heuristics used here are extremely simple.
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get the number of the window that this character is in, or -1 */
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetWindow(const uint32_t offsets[8], uint32_t c) {
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int i;
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<8; ++i) {
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((uint32_t)(c-offsets[i])<=0x7f) {
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return (int8_t)(i);
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return -1;
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruisInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UBool)(c<=offset+0x7f &&
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          (c>=offset || (c<=0x7f &&
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        (c>=0x20 || (1UL<<c)&0x2601))));
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                /* binary 0010 0110 0000 0001,
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                   check for b==0xd || b==0xa || b==9 || b==0 */
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * getNextDynamicWindow returns the next dynamic window to be redefined
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetNextDynamicWindow(SCSUData *scsu) {
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(++scsu->nextWindowUseIndex==8) {
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->nextWindowUseIndex=0;
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return window;
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * useDynamicWindow() adjusts
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * windowUse[] and nextWindowUseIndex for the algorithm to choose
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the next dynamic window to be defined;
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a subclass may override it and provide its own algorithm.
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruuseDynamicWindow(SCSUData *scsu, int8_t window) {
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * move the existing window, which just became the most recently used one,
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * up in windowUse[] to nextWindowUseIndex-1
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* first, find the index of the window - backwards to favor the more recently used windows */
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int i, j;
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    i=scsu->nextWindowUseIndex;
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    do {
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(--i<0) {
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i=7;
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } while(scsu->windowUse[i]!=window);
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* now copy each windowUse[i+1] to [i] */
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    j=i+1;
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(j==8) {
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        j=0;
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(j!=scsu->nextWindowUseIndex) {
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        scsu->windowUse[i]=scsu->windowUse[j];
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=j;
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(++j==8) { j=0; }
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* finally, set the window into the most recently used index */
949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->windowUse[i]=window;
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * calculate the offset and the code for a dynamic window that contains the character
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes fixed offsets into account
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the offset of the window is stored in the offset variable,
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the code is returned
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetDynamicOffset(uint32_t c, uint32_t *pOffset) {
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int i;
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<7; ++i) {
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pOffset=fixedOffsets[i];
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0xf9+i;
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c<0x80) {
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* No dynamic window for US-ASCII. */
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(c<0x3400 ||
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ) {
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pOffset=c&0x7fffff80;
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (int)(c>>7);
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* For these characters we need to take the gapOffset into account. */
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pOffset=c&0x7fffff80;
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (int)((c-gapOffset)>>7);
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Idea for compression:
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - save SCSUData and other state before really starting work
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - at endloop, see if compression could be better with just unicode mode
994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - don't do this if a callback has been called
995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - different buffer handling!
997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Drawback or need for corrective handling:
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * How to achieve both?
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  - Only replace the result after an SDX or SCU?
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode *pErrorCode) {
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu;
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit;
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isSingleByteMode;
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t dynamicWindow;
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t currentOffset;
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t c, delta;
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex, nextSourceIndex;
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* variables for compression heuristics */
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t offset;
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar lead, trail;
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int code;
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t window;
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu=(SCSUData *)cnv->extraInfo;
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=pArgs->source;
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=pArgs->sourceLimit;
1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pArgs->target;
1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the state machine state */
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isSingleByteMode=scsu->fromUIsSingleByteMode;
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dynamicWindow=scsu->fromUDynamicWindow;
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=cnv->fromUChar32;
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex= c==0 ? 0 : -1;
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    nextSourceIndex=0;
1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* similar conversion "loop" as in toUnicode */
1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop:
1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(isSingleByteMode) {
1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c!=0 && targetCapacity>0) {
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            goto getTrailSingle;
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* state machine for single-byte mode */
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(targetCapacity<=0) {
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((c-0x20)<=0x5f) {
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* pass US-ASCII graphic character through */
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsets!=NULL) {
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *offsets++=sourceIndex;
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0x20) {
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* CR/LF/TAB/NUL */
1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)c;
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote C0 control character */
1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SQ0<<8;
1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if((delta=c-currentOffset)<=0x7f) {
1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* use the current dynamic window */
1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(delta|0x80);
1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsets!=NULL) {
1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *offsets++=sourceIndex;
1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(UTF_IS_SURROGATE(c)) {
1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(c)) {
1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle:
1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    lead=(UChar)c;
1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source<sourceLimit) {
1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* test the following code unit */
1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        trail=*source;
1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++source;
1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++nextSourceIndex;
1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            c=UTF16_GET_PAIR_VALUE(c, trail);
1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* convert this surrogate code point */
1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* exit this condition tree */
1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* this is an unmatched lead code unit (1st surrogate) */
1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* callback(illegal) */
1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* no more input */
1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this is an unmatched trail code unit (2nd surrogate) */
1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress supplementary character U+10000..U+10ffff */
1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((delta=c-currentOffset)<=0x7f) {
1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* use the current dynamic window */
1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)(delta|0x80);
1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* there is a dynamic window that contains this character, change to it */
1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=window;
1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((code=getDynamicOffset(c, &offset))>=0) {
1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* might check if there are more characters in this window to come */
1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* define an extended window with this character */
1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    code-=0x200;
1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* change to Unicode mode and output this (lead, trail) pair */
1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)SCU;
1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)lead<<16)|trail;
1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0xa0) {
1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote C1 control character */
1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=2;
1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c==0xfeff || c>=0xfff0) {
1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote signature character=byte order mark and specials */
1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c|=SQU<<16;
1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=3;
1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress all other BMP characters */
1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* there is a window defined that contains this character - switch to it or quote from it? */
1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* change to dynamic window */
1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=window;
1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* quote from dynamic window */
1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((window=getWindow(staticOffsets, c))>=0) {
1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote from static window */
1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((code=getDynamicOffset(c, &offset))>=0) {
1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* define a dynamic window with this character */
1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * this character is not compressible (a BMP ideograph or similar);
1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * switch to Unicode mode if this is the last character in the block
1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * or there is at least one more ideograph following immediately
1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SCU<<16;
1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote Unicode */
1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SQU<<16;
1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* normal end of conversion: prepare for a new character */
1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=0;
1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceIndex=nextSourceIndex;
1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c!=0 && targetCapacity>0) {
1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            goto getTrailUnicode;
1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* state machine for Unicode mode */
1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */
1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(targetCapacity<=0) {
1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* not compressible, write character directly */
1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(targetCapacity>=2) {
1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)(c>>8);
1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)c;
1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(offsets!=NULL) {
1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity-=2;
1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress BMP character if the following one is not an uncompressible ideograph */
1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* ASCII digit or letter */
1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* there is a dynamic window that contains this character, change to it */
1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=window;
1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* define a dynamic window with this character */
1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=getNextDynamicWindow(scsu);
1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=3;
1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* don't know how to compress this character, just write it directly */
1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=2;
1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0xe000) {
1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* c is a surrogate */
1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(c)) {
1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode:
1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    lead=(UChar)c;
1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source<sourceLimit) {
1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* test the following code unit */
1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        trail=*source;
1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++source;
1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++nextSourceIndex;
1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            c=UTF16_GET_PAIR_VALUE(c, trail);
1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* convert this surrogate code point */
1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* exit this condition tree */
1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* this is an unmatched lead code unit (1st surrogate) */
1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* callback(illegal) */
1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* no more input */
1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this is an unmatched trail code unit (2nd surrogate) */
1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress supplementary character */
1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * there is a dynamic window that contains this character and
1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * the following character is not uncompressible,
1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * change to the window
1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=window;
1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          (code=getDynamicOffset(c, &offset))>=0
1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* two supplementary characters in (probably) the same window - define an extended one */
1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    code-=0x200;
1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* don't know how to compress this character, just write it directly */
1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)lead<<16)|trail;
1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else /* 0xe000<=c<0xf300 */ {
1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote to avoid SCSU tags */
1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c|=UQU<<16;
1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=3;
1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* normal end of conversion: prepare for a new character */
1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=0;
1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceIndex=nextSourceIndex;
1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->fromUIsSingleByteMode=isSingleByteMode;
1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->fromUDynamicWindow=dynamicWindow;
1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUChar32=c;
1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=source;
1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=(char *)target;
1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes:
1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* from the first if in the loop we know that targetCapacity>0 */
1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=targetCapacity) {
1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(offsets==NULL) {
1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(length) {
1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* each branch falls through to the next one */
1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 4:
1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>24);
1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 3:
1398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>16);
1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 2:
1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>8);
1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 1:
1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            default:
1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* will never occur */
1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            switch(length) {
1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* each branch falls through to the next one */
1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 4:
1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>24);
1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 3:
1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>16);
1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 2:
1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(c>>8);
1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            case 1:
1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            default:
1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* will never occur */
1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity-=length;
1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal end of conversion: prepare for a new character */
1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=0;
1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceIndex=nextSourceIndex;
1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto loop;
1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t *p;
1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We actually do this backwards here:
1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * In order to save an intermediate variable, we output
1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * first to the overflow buffer what does not fit into the
1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * regular target.
1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* we know that 0<=targetCapacity<length<=4 */
1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length-=targetCapacity;
1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        p=(uint8_t *)cnv->charErrorBuffer;
1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(length) {
1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* each branch falls through to the next one */
1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 4:
1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>24);
1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>16);
1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>8);
1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p=(uint8_t)c;
1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* will never occur */
1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->charErrorBufferLength=(int8_t)length;
1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now output what fits into the regular target */
1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c>>=8*length; /* length was reduced by targetCapacity */
1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(targetCapacity) {
1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* each branch falls through to the next one */
1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>16);
1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(offsets!=NULL) {
1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
1472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>8);
1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(offsets!=NULL) {
1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
1477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)c;
1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(offsets!=NULL) {
1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target overflow */
1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=0;
1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=0;
1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto endloop;
1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
1494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either
1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or
1497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables
1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex.
1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UErrorCode *pErrorCode) {
1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu;
1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit;
1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
1507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isSingleByteMode;
1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t dynamicWindow;
1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t currentOffset;
1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t c, delta;
1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
1516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* variables for compression heuristics */
1518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t offset;
1519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar lead, trail;
1520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int code;
1521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t window;
1522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
1524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
1525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu=(SCSUData *)cnv->extraInfo;
1526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
1528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=pArgs->source;
1529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=pArgs->sourceLimit;
1530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pArgs->target;
1531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the state machine state */
1534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isSingleByteMode=scsu->fromUIsSingleByteMode;
1535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dynamicWindow=scsu->fromUDynamicWindow;
1536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=cnv->fromUChar32;
1539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* similar conversion "loop" as in toUnicode */
1541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop:
1542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(isSingleByteMode) {
1543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c!=0 && targetCapacity>0) {
1544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            goto getTrailSingle;
1545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* state machine for single-byte mode */
1548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */
1549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
1550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(targetCapacity<=0) {
1551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
1552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
1556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((c-0x20)<=0x5f) {
1558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* pass US-ASCII graphic character through */
1559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
1560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
1561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0x20) {
1562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* CR/LF/TAB/NUL */
1564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)c;
1565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote C0 control character */
1568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SQ0<<8;
1569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if((delta=c-currentOffset)<=0x7f) {
1573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* use the current dynamic window */
1574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(delta|0x80);
1575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
1576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(UTF_IS_SURROGATE(c)) {
1577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(c)) {
1578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle:
1579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    lead=(UChar)c;
1580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source<sourceLimit) {
1581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* test the following code unit */
1582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        trail=*source;
1583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++source;
1585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            c=UTF16_GET_PAIR_VALUE(c, trail);
1586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* convert this surrogate code point */
1587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* exit this condition tree */
1588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
1589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* this is an unmatched lead code unit (1st surrogate) */
1590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* callback(illegal) */
1591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
1593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* no more input */
1596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this is an unmatched trail code unit (2nd surrogate) */
1600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
1601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress supplementary character U+10000..U+10ffff */
1606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((delta=c-currentOffset)<=0x7f) {
1607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* use the current dynamic window */
1608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)(delta|0x80);
1609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* there is a dynamic window that contains this character, change to it */
1612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=window;
1613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((code=getDynamicOffset(c, &offset))>=0) {
1619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* might check if there are more characters in this window to come */
1620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* define an extended window with this character */
1621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    code-=0x200;
1622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* change to Unicode mode and output this (lead, trail) pair */
1630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
1631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)SCU;
1632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    --targetCapacity;
1633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)lead<<16)|trail;
1634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0xa0) {
1638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote C1 control character */
1639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=2;
1641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c==0xfeff || c>=0xfff0) {
1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote signature character=byte order mark and specials */
1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c|=SQU<<16;
1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=3;
1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress all other BMP characters */
1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* there is a window defined that contains this character - switch to it or quote from it? */
1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* change to dynamic window */
1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=window;
1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* quote from dynamic window */
1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((window=getWindow(staticOffsets, c))>=0) {
1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote from static window */
1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((code=getDynamicOffset(c, &offset))>=0) {
1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* define a dynamic window with this character */
1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * this character is not compressible (a BMP ideograph or similar);
1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * switch to Unicode mode if this is the last character in the block
1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * or there is at least one more ideograph following immediately
1685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=FALSE;
1687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SCU<<16;
1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* quote Unicode */
1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c|=SQU<<16;
1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=3;
1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* normal end of conversion: prepare for a new character */
1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=0;
1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c!=0 && targetCapacity>0) {
1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            goto getTrailUnicode;
1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* state machine for Unicode mode */
1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */
1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source<sourceLimit) {
1709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(targetCapacity<=0) {
1710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target is full */
1711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
1715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* not compressible, write character directly */
1718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(targetCapacity>=2) {
1719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)(c>>8);
1720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(uint8_t)c;
1721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity-=2;
1722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress BMP character if the following one is not an uncompressible ideograph */
1728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* ASCII digit or letter */
1731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* there is a dynamic window that contains this character, change to it */
1737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=window;
1739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=2;
1743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* define a dynamic window with this character */
1746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        isSingleByteMode=TRUE;
1747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        dynamicWindow=getNextDynamicWindow(scsu);
1748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        useDynamicWindow(scsu, dynamicWindow);
1750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        length=3;
1752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto outputBytes;
1753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* don't know how to compress this character, just write it directly */
1757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=2;
1758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(c<0xe000) {
1760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* c is a surrogate */
1761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(c)) {
1762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode:
1763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    lead=(UChar)c;
1764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source<sourceLimit) {
1765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* test the following code unit */
1766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        trail=*source;
1767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(UTF_IS_SECOND_SURROGATE(trail)) {
1768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ++source;
1769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            c=UTF16_GET_PAIR_VALUE(c, trail);
1770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* convert this surrogate code point */
1771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* exit this condition tree */
1772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else {
1773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* this is an unmatched lead code unit (1st surrogate) */
1774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* callback(illegal) */
1775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            goto endloop;
1777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
1779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* no more input */
1780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this is an unmatched trail code unit (2nd surrogate) */
1784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
1785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* compress supplementary character */
1790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
1794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * there is a dynamic window that contains this character and
1795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * the following character is not uncompressible,
1796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * change to the window
1797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
1798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
1799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=window;
1800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=2;
1804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          (code=getDynamicOffset(c, &offset))>=0
1807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
1808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* two supplementary characters in (probably) the same window - define an extended one */
1809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    isSingleByteMode=TRUE;
1810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    code-=0x200;
1811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    dynamicWindow=getNextDynamicWindow(scsu);
1812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    useDynamicWindow(scsu, dynamicWindow);
1814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* don't know how to compress this character, just write it directly */
1819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=((uint32_t)lead<<16)|trail;
1820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=4;
1821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto outputBytes;
1822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else /* 0xe000<=c<0xf300 */ {
1824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* quote to avoid SCSU tags */
1825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c|=UQU<<16;
1826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=3;
1827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto outputBytes;
1828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* normal end of conversion: prepare for a new character */
1831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=0;
1832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
1835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
1837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->fromUIsSingleByteMode=isSingleByteMode;
1838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scsu->fromUDynamicWindow=dynamicWindow;
1839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUChar32=c;
1841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
1843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=source;
1844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=(char *)target;
1845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
1846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes:
1848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* from the first if in the loop we know that targetCapacity>0 */
1850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=targetCapacity) {
1851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(length) {
1852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* each branch falls through to the next one */
1853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 4:
1854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>24);
1855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
1856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>16);
1857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
1858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>8);
1859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
1860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)c;
1861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
1862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* will never occur */
1863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity-=length;
1866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* normal end of conversion: prepare for a new character */
1868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=0;
1869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto loop;
1870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t *p;
1872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
1874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We actually do this backwards here:
1875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * In order to save an intermediate variable, we output
1876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * first to the overflow buffer what does not fit into the
1877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * regular target.
1878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
1879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* we know that 0<=targetCapacity<length<=4 */
1880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length-=targetCapacity;
1882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        p=(uint8_t *)cnv->charErrorBuffer;
1883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(length) {
1884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* each branch falls through to the next one */
1885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 4:
1886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>24);
1887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
1888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>16);
1889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
1890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(c>>8);
1891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
1892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p=(uint8_t)c;
1893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
1894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* will never occur */
1895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->charErrorBufferLength=(int8_t)length;
1898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now output what fits into the regular target */
1900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c>>=8*length; /* length was reduced by targetCapacity */
1901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(targetCapacity) {
1902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* each branch falls through to the next one */
1903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
1904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>16);
1905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
1906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(c>>8);
1907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
1908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)c;
1909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
1910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target overflow */
1914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=0;
1915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=0;
1917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto endloop;
1918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* miscellaneous ------------------------------------------------------------ */
1922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char *
1924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUGetName(const UConverter *cnv) {
1925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(scsu->locale) {
1928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case l_ja:
1929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return "SCSU,locale=ja";
1930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
1931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return "SCSU";
1932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* structure for SafeClone calculations */
1936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct cloneSCSUStruct
1937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
1938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter cnv;
1939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SCSUData mydata;
1940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter *
1943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUSafeClone(const UConverter *cnv,
1944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               void *stackBuffer,
1945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               int32_t *pBufferSize,
1946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UErrorCode *status)
1947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
1948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    struct cloneSCSUStruct * localClone;
1949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(*status)){
1952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
1953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pBufferSize = bufferSizeNeeded;
1957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
1958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    localClone = (struct cloneSCSUStruct *)stackBuffer;
1961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    localClone->cnv.extraInfo = &localClone->mydata;
1965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    localClone->cnv.isExtraLocal = TRUE;
1966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return &localClone->cnv;
1968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _SCSUImpl={
1972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_SCSU,
1973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUOpen,
1978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUClose,
1979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUReset,
1980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUToUnicode,
1982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUToUnicodeWithOffsets,
1983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUFromUnicode,
1984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUFromUnicodeWithOffsets,
1985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUGetName,
1989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _SCSUSafeClone,
1991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getCompleteUnicodeSet
1992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _SCSUStaticData={
1995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
1996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "SCSU",
1997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1212, /* CCSID for SCSU */
1998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_IBM, UCNV_SCSU,
1999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
2000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
2001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * substitution string.
2003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
2004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0x0e, 0xff, 0xfd, 0 }, 3,
2005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FALSE, FALSE,
2006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
2007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
2008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
2010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
2011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _SCSUData={
2012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t)0),
2013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
2014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0
2015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
2016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
2017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
2018