1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (C) 2000-2009, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnvscsu.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2000nov18 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* This is an implementation of the Standard Compression Scheme for Unicode 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* as defined in http://www.unicode.org/unicode/reports/tr6/ . 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Reserved commands and window settings are treated as illegal sequences and 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* will result in callback calls. 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU definitions --------------------------------------------------------- */ 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU command byte values */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQ0=0x01, /* Quote from window pair 0 */ 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQ7=0x08, /* Quote from window pair 7 */ 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SDX=0x0B, /* Define a window as extended */ 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Srs=0x0C, /* reserved */ 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQU=0x0E, /* Quote a single Unicode character */ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCU=0x0F, /* Change to Unicode mode */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SC0=0x10, /* Select window 0 */ 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SC7=0x17, /* Select window 7 */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SD0=0x18, /* Define and select window 0 */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SD7=0x1F, /* Define and select window 7 */ 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UC0=0xE0, /* Select window 0 */ 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UC7=0xE7, /* Select window 7 */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UD0=0xE8, /* Define and select window 0 */ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UD7=0xEF, /* Define and select window 7 */ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UQU=0xF0, /* Quote a single Unicode character */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UDX=0xF1, /* Define a Window as extended */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Urs=0xF2 /* reserved */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unicode code points from 3400 to E000 are not adressible by 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dynamic window, since in these areas no short run alphabets are 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * found. Therefore add gapOffset to all values from gapThreshold. 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gapThreshold=0x68, 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gapOffset=0xAC00, 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* values between reservedStart and fixedThreshold are reserved */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru reservedStart=0xA8, 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use table of predefined fixed offsets for values from fixedThreshold */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixedThreshold=0xF9 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* constant offsets for the 8 static windows */ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t staticOffsets[8]={ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0000, /* ASCII for quoted tags */ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0100, /* Latin Extended-A */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0300, /* Combining Diacritical Marks */ 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2000, /* General Punctuation */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2080, /* Currency Symbols */ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2100, /* Letterlike Symbols and Number Forms */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3000 /* CJK Symbols and punctuation */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* initial offsets for the 8 dynamic (sliding) windows */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t initialDynamicOffsets[8]={ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0080, /* Latin-1 */ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x00C0, /* Latin Extended A */ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0400, /* Cyrillic */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0600, /* Arabic */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0900, /* Devanagari */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3040, /* Hiragana */ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x30A0, /* Katakana */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0xFF00 /* Fullwidth ASCII */ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Table of fixed predefined Offsets */ 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t fixedOffsets[]={ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFA */ 0x0250, /* IPA extensions */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFB */ 0x0370, /* Greek */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFC */ 0x0530, /* Armenian */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFD */ 0x3040, /* Hiragana */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFE */ 0x30A0, /* Katakana */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* state values */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru readCommand, 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quotePairOne, 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quotePairTwo, 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteOne, 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definePairOne, 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definePairTwo, 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defineOne 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct SCSUData { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t toUDynamicOffsets[8]; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fromUDynamicOffsets[8]; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine state - toUnicode */ 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool toUIsSingleByteMode; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUState; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t toUQuoteWindow, toUDynamicWindow; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUByteOne; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUPadding[3]; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine state - fromUnicode */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fromUIsSingleByteMode; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t fromUDynamicWindow; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * windowUse[] keeps track of the use of the dynamic windows: 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At nextWindowUseIndex there is the least recently used window, 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the following windows (in a wrapping manner) are more and more 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recently used. 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At nextWindowUseIndex-1 there is the most recently used window. 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t locale; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t nextWindowUseIndex; 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t windowUse[8]; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} SCSUData; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lGeneric, l_ja 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU setup functions ----------------------------------------------------- */ 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu=(SCSUData *)cnv->extraInfo; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset toUnicode */ 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=TRUE; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=readCommand; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=0; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset fromUnicode */ 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=TRUE; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=0; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->nextWindowUseIndex=0; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(scsu->locale) { 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case l_ja: 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->windowUse, initialWindowUse, 8); 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUOpen(UConverter *cnv, 19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 19585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *locale=pArgs->locale; 19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->onlyTestIsLoadable) { 19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo!=NULL) { 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((SCSUData *)cnv->extraInfo)->locale=l_ja; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((SCSUData *)cnv->extraInfo)->locale=lGeneric; 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUReset(cnv, UCNV_RESET_BOTH); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Set the substitution character U+fffd as a Unicode string. */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->subUChars[0]=0xfffd; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->subCharLen=-1; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUClose(UConverter *cnv) { 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo!=NULL) { 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!cnv->isExtraLocal) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(cnv->extraInfo); 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo=NULL; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-to-Unicode conversion functions ------------------------------------- */ 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t state, byteOne; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t quoteWindow, dynamicWindow; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->toUIsSingleByteMode; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=scsu->toUState; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=scsu->toUQuoteWindow; 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->toUDynamicWindow; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=scsu->toUByteOne; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=state==readCommand ? 0 : -1; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion "loop" 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For performance, this is not a normal C loop. 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Instead, there are two code blocks for the two SCSU modes. 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function branches to either one, and a change of the mode is done with a goto to 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the other branch. 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each branch has two conventional loops: 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a fast-path loop for the most common codes in the mode 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a loop for all other codes in the mode 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the fast-path runs into a code that it cannot handle, its loop ends and it 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * runs into the following loop to handle the other codes. 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the input or output buffer is also handled by the slower loop. 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The callback handling is done by returning with an error code. 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The conversion framework actually calls the callback function. 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for single-byte mode */ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=0x7f) { 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write US-ASCII graphic character or DEL */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode: 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* redundant conditions are commented out */ 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* here: b<0x20 because otherwise we would be in fastSingle */ 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(SC0<=b) { 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=SC7) { 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SC0); 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* if(SD0<=b && b<=SD7) */ { 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SD0); 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* SQ0<=b && */ b<=SQ7) { 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=(int8_t)(b-SQ0); 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quoteOne; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SDX) { 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SQU) { 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SCU) { 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Srs */ { 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the first byte of a multibyte sequence in toUBytes[] */ 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quoteOne: 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<0x80) { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all static offsets are in the BMP */ 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(staticOffsets[quoteWindow]+b); 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairOne: 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)((b>>5)&7); 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=(uint8_t)(b&0x1f); 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairTwo; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairTwo: 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case defineOne: 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b==0) { 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0 */ 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<gapThreshold) { 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>=fixedThreshold) { 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for Unicode mode */ 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode: 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((b<<8)|source[1]); 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex+=2; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for Unicode mode */ 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(b-UC0)>(Urs-UC0)) { 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UC0<=b && */ b<=UC7) { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UC0); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UD0<=b && */ b<=UD7) { 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UD0); 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UDX) { 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UQU) { 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Urs */ { 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset to deal with the next character */ 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(state==readCommand) { 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in a multi-byte sequence, reset toULength */ 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=isSingleByteMode; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=state; 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=quoteWindow; 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicWindow=dynamicWindow; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=byteOne; 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUToUnicodeWithOffsets but without offset handling. 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t state, byteOne; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t quoteWindow, dynamicWindow; 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->toUIsSingleByteMode; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=scsu->toUState; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=scsu->toUQuoteWindow; 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->toUDynamicWindow; 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=scsu->toUByteOne; 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion "loop" 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For performance, this is not a normal C loop. 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Instead, there are two code blocks for the two SCSU modes. 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function branches to either one, and a change of the mode is done with a goto to 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the other branch. 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each branch has two conventional loops: 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a fast-path loop for the most common codes in the mode 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a loop for all other codes in the mode 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the fast-path runs into a code that it cannot handle, its loop ends and it 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * runs into the following loop to handle the other codes. 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the input or output buffer is also handled by the slower loop. 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The callback handling is done by returning with an error code. 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The conversion framework actually calls the callback function. 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for single-byte mode */ 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=0x7f) { 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write US-ASCII graphic character or DEL */ 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode: 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* redundant conditions are commented out */ 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* here: b<0x20 because otherwise we would be in fastSingle */ 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(SC0<=b) { 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=SC7) { 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SC0); 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* if(SD0<=b && b<=SD7) */ { 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SD0); 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* SQ0<=b && */ b<=SQ7) { 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=(int8_t)(b-SQ0); 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quoteOne; 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SDX) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SQU) { 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SCU) { 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Srs */ { 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the first byte of a multibyte sequence in toUBytes[] */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quoteOne: 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<0x80) { 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all static offsets are in the BMP */ 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(staticOffsets[quoteWindow]+b); 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairOne: 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)((b>>5)&7); 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=(uint8_t)(b&0x1f); 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairTwo; 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairTwo: 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case defineOne: 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b==0) { 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0 */ 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<gapThreshold) { 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>=fixedThreshold) { 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for Unicode mode */ 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode: 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((b<<8)|source[1]); 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for Unicode mode */ 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(b-UC0)>(Urs-UC0)) { 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UC0<=b && */ b<=UC7) { 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UC0); 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UD0<=b && */ b<=UD7) { 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UD0); 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UDX) { 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UQU) { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Urs */ { 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset to deal with the next character */ 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(state==readCommand) { 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in a multi-byte sequence, reset toULength */ 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=isSingleByteMode; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=state; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=quoteWindow; 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicWindow=dynamicWindow; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=byteOne; 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-from-Unicode conversion functions ----------------------------------- */ 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasonable results. The lookahead is minimal. 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Many cases are simple: 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A character fits directly into the current mode, a dynamic or static window, 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or is not compressible. These cases are tested first. 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Real compression heuristics are applied to the rest, in code branches for 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * single/Unicode mode and BMP/supplementary code points. 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The heuristics used here are extremely simple. 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get the number of the window that this character is in, or -1 */ 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetWindow(const uint32_t offsets[8], uint32_t c) { 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<8; ++i) { 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-offsets[i])<=0x7f) { 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int8_t)(i); 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruisInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UBool)(c<=offset+0x7f && 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c>=offset || (c<=0x7f && 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c>=0x20 || (1UL<<c)&0x2601)))); 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary 0010 0110 0000 0001, 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru check for b==0xd || b==0xa || b==9 || b==0 */ 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * getNextDynamicWindow returns the next dynamic window to be redefined 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetNextDynamicWindow(SCSUData *scsu) { 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++scsu->nextWindowUseIndex==8) { 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->nextWindowUseIndex=0; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return window; 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * useDynamicWindow() adjusts 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * windowUse[] and nextWindowUseIndex for the algorithm to choose 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the next dynamic window to be defined; 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a subclass may override it and provide its own algorithm. 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruuseDynamicWindow(SCSUData *scsu, int8_t window) { 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * move the existing window, which just became the most recently used one, 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * up in windowUse[] to nextWindowUseIndex-1 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* first, find the index of the window - backwards to favor the more recently used windows */ 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i, j; 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=scsu->nextWindowUseIndex; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(--i<0) { 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=7; 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(scsu->windowUse[i]!=window); 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy each windowUse[i+1] to [i] */ 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=i+1; 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(j==8) { 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(j!=scsu->nextWindowUseIndex) { 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->windowUse[i]=scsu->windowUse[j]; 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++j==8) { j=0; } 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finally, set the window into the most recently used index */ 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->windowUse[i]=window; 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * calculate the offset and the code for a dynamic window that contains the character 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes fixed offsets into account 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the offset of the window is stored in the offset variable, 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the code is returned 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetDynamicOffset(uint32_t c, uint32_t *pOffset) { 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<7; ++i) { 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-fixedOffsets[i])<=0x7f) { 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=fixedOffsets[i]; 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xf9+i; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x80) { 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* No dynamic window for US-ASCII. */ 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x3400 || 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)(c-0x10000)<(0x14000-0x10000) || 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=c&0x7fffff80; 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int)(c>>7); 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* For these characters we need to take the gapOffset into account. */ 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=c&0x7fffff80; 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int)((c-gapOffset)>>7); 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Idea for compression: 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - save SCSUData and other state before really starting work 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - at endloop, see if compression could be better with just unicode mode 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - don't do this if a callback has been called 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - different buffer handling! 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Drawback or need for corrective handling: 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not only for compression but also for HTML/XML documents with following charset/encoding announcers. 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * How to achieve both? 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - Only replace the result after an SDX or SCU? 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t dynamicWindow; 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t currentOffset; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c, delta; 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* variables for compression heuristics */ 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t offset; 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar lead, trail; 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int code; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window; 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->fromUIsSingleByteMode; 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->fromUDynamicWindow; 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex= c==0 ? 0 : -1; 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* similar conversion "loop" as in toUnicode */ 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop: 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailSingle; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for single-byte mode */ 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */ 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c-0x20)<=0x5f) { 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* pass US-ASCII graphic character through */ 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x20) { 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C0 control character */ 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQ0<<8; 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((delta=c-currentOffset)<=0x7f) { 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UTF_IS_SURROGATE(c)) { 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(c)) { 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle: 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=UTF16_GET_PAIR_VALUE(c, trail); 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character U+10000..U+10ffff */ 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((delta=c-currentOffset)<=0x7f) { 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* might check if there are more characters in this window to come */ 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define an extended window with this character */ 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to Unicode mode and output this (lead, trail) pair */ 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)SCU; 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xa0) { 1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C1 control character */ 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==0xfeff || c>=0xfff0) { 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote signature character=byte order mark and specials */ 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress all other BMP characters */ 1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a window defined that contains this character - switch to it or quote from it? */ 1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to dynamic window */ 1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from dynamic window */ 1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(staticOffsets, c))>=0) { 1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from static window */ 1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this character is not compressible (a BMP ideograph or similar); 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch to Unicode mode if this is the last character in the block 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or there is at least one more ideograph following immediately 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SCU<<16; 1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote Unicode */ 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailUnicode; 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for Unicode mode */ 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not compressible, write character directly */ 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress BMP character if the following one is not an uncompressible ideograph */ 1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ASCII digit or letter */ 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xe000) { 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c is a surrogate */ 1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(c)) { 1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode: 1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=UTF16_GET_PAIR_VALUE(c, trail); 1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character */ 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is a dynamic window that contains this character and 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following character is not uncompressible, 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change to the window 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (code=getDynamicOffset(c, &offset))>=0 1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two supplementary characters in (probably) the same window - define an extended one */ 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* 0xe000<=c<0xf300 */ { 1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote to avoid SCSU tags */ 1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=UQU<<16; 1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=isSingleByteMode; 1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=dynamicWindow; 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes: 1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto loop; 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p; 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 0<=targetCapacity<length<=4 */ 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=(uint8_t *)cnv->charErrorBuffer; 1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>24); 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>16); 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>8); 1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p=(uint8_t)c; 1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c>>=8*length; /* length was reduced by targetCapacity */ 1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. 1495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 1497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, 1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 1507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t dynamicWindow; 1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t currentOffset; 1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c, delta; 1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* variables for compression heuristics */ 1518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t offset; 1519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar lead, trail; 1520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int code; 1521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window; 1522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 1525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 1526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 1530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 1531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 1532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 1534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->fromUIsSingleByteMode; 1535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->fromUDynamicWindow; 1536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 1539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* similar conversion "loop" as in toUnicode */ 1541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop: 1542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 1543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailSingle; 1545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for single-byte mode */ 1548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */ 1549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c-0x20)<=0x5f) { 1558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* pass US-ASCII graphic character through */ 1559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x20) { 1562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 1563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 1564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C0 control character */ 1568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQ0<<8; 1569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((delta=c-currentOffset)<=0x7f) { 1573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UTF_IS_SURROGATE(c)) { 1577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(c)) { 1578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle: 1579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 1583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 1584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=UTF16_GET_PAIR_VALUE(c, trail); 1586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character U+10000..U+10ffff */ 1606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((delta=c-currentOffset)<=0x7f) { 1607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* might check if there are more characters in this window to come */ 1620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define an extended window with this character */ 1621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to Unicode mode and output this (lead, trail) pair */ 1630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)SCU; 1632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xa0) { 1638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C1 control character */ 1639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 1640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==0xfeff || c>=0xfff0) { 1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote signature character=byte order mark and specials */ 1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress all other BMP characters */ 1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a window defined that contains this character - switch to it or quote from it? */ 1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to dynamic window */ 1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from dynamic window */ 1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(staticOffsets, c))>=0) { 1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from static window */ 1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 1679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this character is not compressible (a BMP ideograph or similar); 1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch to Unicode mode if this is the last character in the block 1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or there is at least one more ideograph following immediately 1685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SCU<<16; 1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote Unicode */ 1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailUnicode; 1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for Unicode mode */ 1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 1717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not compressible, write character directly */ 1718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 1719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 1722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 1727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress BMP character if the following one is not an uncompressible ideograph */ 1728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 1729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 1730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ASCII digit or letter */ 1731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 1733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xe000) { 1760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c is a surrogate */ 1761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(c)) { 1762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode: 1763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 1767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 1768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=UTF16_GET_PAIR_VALUE(c, trail); 1770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character */ 1790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 1791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is a dynamic window that contains this character and 1795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following character is not uncompressible, 1796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change to the window 1797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 1806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (code=getDynamicOffset(c, &offset))>=0 1807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two supplementary characters in (probably) the same window - define an extended one */ 1809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* 0xe000<=c<0xf300 */ { 1824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote to avoid SCSU tags */ 1825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=UQU<<16; 1826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=isSingleByteMode; 1838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=dynamicWindow; 1839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 1841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 1845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes: 1848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 1849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 1850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 1851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 1855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 1866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto loop; 1870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p; 1872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 1875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 1876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 1877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 1878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 0<=targetCapacity<length<=4 */ 1880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 1881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 1882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=(uint8_t *)cnv->charErrorBuffer; 1883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>24); 1887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>16); 1889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>8); 1891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p=(uint8_t)c; 1893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 1898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 1900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c>>=8*length; /* length was reduced by targetCapacity */ 1901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 1902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 1906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 1914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 1915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* miscellaneous ------------------------------------------------------------ */ 1922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char * 1924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUGetName(const UConverter *cnv) { 1925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu=(SCSUData *)cnv->extraInfo; 1926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(scsu->locale) { 1928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case l_ja: 1929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return "SCSU,locale=ja"; 1930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return "SCSU"; 1932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* structure for SafeClone calculations */ 1936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct cloneSCSUStruct 1937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter cnv; 1939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData mydata; 1940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 1943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUSafeClone(const UConverter *cnv, 1944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *stackBuffer, 1945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *pBufferSize, 1946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status) 1947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct cloneSCSUStruct * localClone; 1949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); 1950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*status)){ 1952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 1956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pBufferSize = bufferSizeNeeded; 1957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone = (struct cloneSCSUStruct *)stackBuffer; 1961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 1962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); 1964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.extraInfo = &localClone->mydata; 1965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.isExtraLocal = TRUE; 1966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &localClone->cnv; 1968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _SCSUImpl={ 1972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_SCSU, 1973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUOpen, 1978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUClose, 1979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUReset, 1980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUToUnicode, 1982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUToUnicodeWithOffsets, 1983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUFromUnicode, 1984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUFromUnicodeWithOffsets, 1985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUGetName, 1989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUSafeClone, 1991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getCompleteUnicodeSet 1992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _SCSUStaticData={ 1995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "SCSU", 1997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1212, /* CCSID for SCSU */ 1998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, UCNV_SCSU, 1999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ 2000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode 2002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * substitution string. 2003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x0e, 0xff, 0xfd, 0 }, 3, 2005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, FALSE, 2006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 2007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 2008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 2009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _SCSUData={ 2012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t)0), 2013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl, 2014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 2015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 2018