1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2000-2011, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnvscsu.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2000nov18 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* This is an implementation of the Standard Compression Scheme for Unicode 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* as defined in http://www.unicode.org/unicode/reports/tr6/ . 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Reserved commands and window settings are treated as illegal sequences and 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* will result in callback calls. 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h" 2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU definitions --------------------------------------------------------- */ 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU command byte values */ 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQ0=0x01, /* Quote from window pair 0 */ 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQ7=0x08, /* Quote from window pair 7 */ 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SDX=0x0B, /* Define a window as extended */ 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Srs=0x0C, /* reserved */ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SQU=0x0E, /* Quote a single Unicode character */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCU=0x0F, /* Change to Unicode mode */ 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SC0=0x10, /* Select window 0 */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SC7=0x17, /* Select window 7 */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SD0=0x18, /* Define and select window 0 */ 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SD7=0x1F, /* Define and select window 7 */ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UC0=0xE0, /* Select window 0 */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UC7=0xE7, /* Select window 7 */ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UD0=0xE8, /* Define and select window 0 */ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UD7=0xEF, /* Define and select window 7 */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UQU=0xF0, /* Quote a single Unicode character */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UDX=0xF1, /* Define a Window as extended */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Urs=0xF2 /* reserved */ 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unicode code points from 3400 to E000 are not adressible by 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dynamic window, since in these areas no short run alphabets are 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * found. Therefore add gapOffset to all values from gapThreshold. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gapThreshold=0x68, 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gapOffset=0xAC00, 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* values between reservedStart and fixedThreshold are reserved */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru reservedStart=0xA8, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use table of predefined fixed offsets for values from fixedThreshold */ 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixedThreshold=0xF9 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* constant offsets for the 8 static windows */ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t staticOffsets[8]={ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0000, /* ASCII for quoted tags */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0100, /* Latin Extended-A */ 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0300, /* Combining Diacritical Marks */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2000, /* General Punctuation */ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2080, /* Currency Symbols */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2100, /* Letterlike Symbols and Number Forms */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3000 /* CJK Symbols and punctuation */ 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* initial offsets for the 8 dynamic (sliding) windows */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t initialDynamicOffsets[8]={ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0080, /* Latin-1 */ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x00C0, /* Latin Extended A */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0400, /* Cyrillic */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0600, /* Arabic */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0900, /* Devanagari */ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3040, /* Hiragana */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x30A0, /* Katakana */ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0xFF00 /* Fullwidth ASCII */ 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Table of fixed predefined Offsets */ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t fixedOffsets[]={ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFA */ 0x0250, /* IPA extensions */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFB */ 0x0370, /* Greek */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFC */ 0x0530, /* Armenian */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFD */ 0x3040, /* Hiragana */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFE */ 0x30A0, /* Katakana */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* state values */ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru readCommand, 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quotePairOne, 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quotePairTwo, 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteOne, 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definePairOne, 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definePairTwo, 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defineOne 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct SCSUData { 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t toUDynamicOffsets[8]; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fromUDynamicOffsets[8]; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine state - toUnicode */ 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool toUIsSingleByteMode; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUState; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t toUQuoteWindow, toUDynamicWindow; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUByteOne; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t toUPadding[3]; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine state - fromUnicode */ 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fromUIsSingleByteMode; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t fromUDynamicWindow; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * windowUse[] keeps track of the use of the dynamic windows: 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At nextWindowUseIndex there is the least recently used window, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the following windows (in a wrapping manner) are more and more 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recently used. 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At nextWindowUseIndex-1 there is the most recently used window. 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t locale; 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t nextWindowUseIndex; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t windowUse[8]; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} SCSUData; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lGeneric, l_ja 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU setup functions ----------------------------------------------------- */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu=(SCSUData *)cnv->extraInfo; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset toUnicode */ 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=TRUE; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=readCommand; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=0; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset fromUnicode */ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=TRUE; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=0; 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->nextWindowUseIndex=0; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(scsu->locale) { 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case l_ja: 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(scsu->windowUse, initialWindowUse, 8); 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUOpen(UConverter *cnv, 19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *locale=pArgs->locale; 19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->onlyTestIsLoadable) { 19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 19985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo!=NULL) { 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((SCSUData *)cnv->extraInfo)->locale=l_ja; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((SCSUData *)cnv->extraInfo)->locale=lGeneric; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUReset(cnv, UCNV_RESET_BOTH); 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Set the substitution character U+fffd as a Unicode string. */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->subUChars[0]=0xfffd; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->subCharLen=-1; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUClose(UConverter *cnv) { 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo!=NULL) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!cnv->isExtraLocal) { 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(cnv->extraInfo); 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo=NULL; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-to-Unicode conversion functions ------------------------------------- */ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t state, byteOne; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t quoteWindow, dynamicWindow; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->toUIsSingleByteMode; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=scsu->toUState; 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=scsu->toUQuoteWindow; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->toUDynamicWindow; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=scsu->toUByteOne; 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=state==readCommand ? 0 : -1; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion "loop" 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For performance, this is not a normal C loop. 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Instead, there are two code blocks for the two SCSU modes. 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function branches to either one, and a change of the mode is done with a goto to 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the other branch. 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each branch has two conventional loops: 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a fast-path loop for the most common codes in the mode 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a loop for all other codes in the mode 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the fast-path runs into a code that it cannot handle, its loop ends and it 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * runs into the following loop to handle the other codes. 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the input or output buffer is also handled by the slower loop. 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The callback handling is done by returning with an error code. 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The conversion framework actually calls the callback function. 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for single-byte mode */ 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=0x7f) { 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write US-ASCII graphic character or DEL */ 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode: 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* redundant conditions are commented out */ 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* here: b<0x20 because otherwise we would be in fastSingle */ 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(SC0<=b) { 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=SC7) { 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SC0); 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* if(SD0<=b && b<=SD7) */ { 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SD0); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* SQ0<=b && */ b<=SQ7) { 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=(int8_t)(b-SQ0); 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quoteOne; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SDX) { 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SQU) { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SCU) { 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Srs */ { 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the first byte of a multibyte sequence in toUBytes[] */ 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quoteOne: 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<0x80) { 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all static offsets are in the BMP */ 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(staticOffsets[quoteWindow]+b); 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairOne: 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)((b>>5)&7); 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=(uint8_t)(b&0x1f); 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairTwo; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairTwo: 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case defineOne: 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b==0) { 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0 */ 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<gapThreshold) { 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>=fixedThreshold) { 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for Unicode mode */ 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode: 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((b<<8)|source[1]); 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex+=2; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for Unicode mode */ 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(b-UC0)>(Urs-UC0)) { 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UC0<=b && */ b<=UC7) { 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UC0); 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UD0<=b && */ b<=UD7) { 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UD0); 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UDX) { 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UQU) { 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Urs */ { 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset to deal with the next character */ 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(state==readCommand) { 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in a multi-byte sequence, reset toULength */ 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=isSingleByteMode; 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=state; 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=quoteWindow; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicWindow=dynamicWindow; 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=byteOne; 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUToUnicodeWithOffsets but without offset handling. 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t state, byteOne; 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t quoteWindow, dynamicWindow; 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->toUIsSingleByteMode; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=scsu->toUState; 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=scsu->toUQuoteWindow; 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->toUDynamicWindow; 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=scsu->toUByteOne; 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion "loop" 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For performance, this is not a normal C loop. 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Instead, there are two code blocks for the two SCSU modes. 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function branches to either one, and a change of the mode is done with a goto to 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the other branch. 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each branch has two conventional loops: 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a fast-path loop for the most common codes in the mode 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - a loop for all other codes in the mode 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the fast-path runs into a code that it cannot handle, its loop ends and it 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * runs into the following loop to handle the other codes. 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the input or output buffer is also handled by the slower loop. 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The callback handling is done by returning with an error code. 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The conversion framework actually calls the callback function. 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for single-byte mode */ 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=0x7f) { 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write US-ASCII graphic character or DEL */ 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusingleByteMode: 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* redundant conditions are commented out */ 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* here: b<0x20 because otherwise we would be in fastSingle */ 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)b; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(SC0<=b) { 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=SC7) { 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SC0); 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* if(SD0<=b && b<=SD7) */ { 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-SD0); 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* SQ0<=b && */ b<=SQ7) { 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoteWindow=(int8_t)(b-SQ0); 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quoteOne; 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SDX) { 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SQU) { 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==SCU) { 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Srs */ { 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the first byte of a multibyte sequence in toUBytes[] */ 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quoteOne: 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<0x80) { 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all static offsets are in the BMP */ 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(staticOffsets[quoteWindow]+b); 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write from dynamic window */ 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xd7c0+(c>>10)); 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)(0xdc00|(c&0x3ff)); 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairOne: 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)((b>>5)&7); 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=(uint8_t)(b&0x1f); 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairTwo; 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case definePairTwo: 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case defineOne: 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b==0) { 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0 */ 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<gapThreshold) { 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>=fixedThreshold) { 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast path for Unicode mode */ 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==readCommand) { 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastUnicode: 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((b<<8)|source[1]); 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal state machine for Unicode mode */ 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case readCommand: 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(b-UC0)>(Urs-UC0)) { 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UC0<=b && */ b<=UC7) { 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UC0); 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(/* UD0<=b && */ b<=UD7) { 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=(int8_t)(b-UD0); 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=defineOne; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UDX) { 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=definePairOne; 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto singleByteMode; 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==UQU) { 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairOne; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* Urs */ { 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=b; 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairOne: 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteOne=b; 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=b; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=quotePairTwo; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case quotePairTwo: 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)((byteOne<<8)|b); 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastUnicode; 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset to deal with the next character */ 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=readCommand; 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(state==readCommand) { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in a multi-byte sequence, reset toULength */ 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUIsSingleByteMode=isSingleByteMode; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUState=state; 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUQuoteWindow=quoteWindow; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUDynamicWindow=dynamicWindow; 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->toUByteOne=byteOne; 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* SCSU-from-Unicode conversion functions ----------------------------------- */ 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasonable results. The lookahead is minimal. 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Many cases are simple: 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A character fits directly into the current mode, a dynamic or static window, 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or is not compressible. These cases are tested first. 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Real compression heuristics are applied to the rest, in code branches for 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * single/Unicode mode and BMP/supplementary code points. 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The heuristics used here are extremely simple. 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get the number of the window that this character is in, or -1 */ 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetWindow(const uint32_t offsets[8], uint32_t c) { 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<8; ++i) { 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-offsets[i])<=0x7f) { 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int8_t)(i); 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruisInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UBool)(c<=offset+0x7f && 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c>=offset || (c<=0x7f && 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c>=0x20 || (1UL<<c)&0x2601)))); 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary 0010 0110 0000 0001, 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru check for b==0xd || b==0xa || b==9 || b==0 */ 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * getNextDynamicWindow returns the next dynamic window to be redefined 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int8_t 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetNextDynamicWindow(SCSUData *scsu) { 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++scsu->nextWindowUseIndex==8) { 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->nextWindowUseIndex=0; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return window; 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * useDynamicWindow() adjusts 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * windowUse[] and nextWindowUseIndex for the algorithm to choose 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the next dynamic window to be defined; 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a subclass may override it and provide its own algorithm. 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruuseDynamicWindow(SCSUData *scsu, int8_t window) { 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * move the existing window, which just became the most recently used one, 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * up in windowUse[] to nextWindowUseIndex-1 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* first, find the index of the window - backwards to favor the more recently used windows */ 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i, j; 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=scsu->nextWindowUseIndex; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(--i<0) { 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=7; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(scsu->windowUse[i]!=window); 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy each windowUse[i+1] to [i] */ 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=i+1; 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(j==8) { 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(j!=scsu->nextWindowUseIndex) { 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->windowUse[i]=scsu->windowUse[j]; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j; 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++j==8) { j=0; } 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finally, set the window into the most recently used index */ 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->windowUse[i]=window; 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * calculate the offset and the code for a dynamic window that contains the character 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes fixed offsets into account 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the offset of the window is stored in the offset variable, 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the code is returned 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetDynamicOffset(uint32_t c, uint32_t *pOffset) { 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<7; ++i) { 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-fixedOffsets[i])<=0x7f) { 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=fixedOffsets[i]; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xf9+i; 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x80) { 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* No dynamic window for US-ASCII. */ 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x3400 || 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)(c-0x10000)<(0x14000-0x10000) || 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=c&0x7fffff80; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int)(c>>7); 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* For these characters we need to take the gapOffset into account. */ 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pOffset=c&0x7fffff80; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int)((c-gapOffset)>>7); 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Idea for compression: 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - save SCSUData and other state before really starting work 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - at endloop, see if compression could be better with just unicode mode 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - don't do this if a callback has been called 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - different buffer handling! 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Drawback or need for corrective handling: 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not only for compression but also for HTML/XML documents with following charset/encoding announcers. 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * How to achieve both? 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - Only replace the result after an SDX or SCU? 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t dynamicWindow; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t currentOffset; 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c, delta; 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* variables for compression heuristics */ 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t offset; 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar lead, trail; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int code; 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window; 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->fromUIsSingleByteMode; 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->fromUDynamicWindow; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex= c==0 ? 0 : -1; 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* similar conversion "loop" as in toUnicode */ 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop: 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailSingle; 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for single-byte mode */ 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */ 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c-0x20)<=0x5f) { 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* pass US-ASCII graphic character through */ 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x20) { 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C0 control character */ 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQ0<<8; 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((delta=c-currentOffset)<=0x7f) { 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 110283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else if(U16_IS_SURROGATE(c)) { 110383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_SURROGATE_LEAD(c)) { 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle: 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 110983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 111283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character U+10000..U+10ffff */ 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((delta=c-currentOffset)<=0x7f) { 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* might check if there are more characters in this window to come */ 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define an extended window with this character */ 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to Unicode mode and output this (lead, trail) pair */ 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)SCU; 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xa0) { 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C1 control character */ 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==0xfeff || c>=0xfff0) { 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote signature character=byte order mark and specials */ 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress all other BMP characters */ 1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a window defined that contains this character - switch to it or quote from it? */ 1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to dynamic window */ 1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from dynamic window */ 1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(staticOffsets, c))>=0) { 1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from static window */ 1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this character is not compressible (a BMP ideograph or similar); 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch to Unicode mode if this is the last character in the block 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or there is at least one more ideograph following immediately 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SCU<<16; 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote Unicode */ 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailUnicode; 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for Unicode mode */ 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not compressible, write character directly */ 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress BMP character if the following one is not an uncompressible ideograph */ 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ASCII digit or letter */ 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xe000) { 1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c is a surrogate */ 130083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_SURROGATE_LEAD(c)) { 1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode: 1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 130683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 130983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character */ 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is a dynamic window that contains this character and 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following character is not uncompressible, 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change to the window 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (code=getDynamicOffset(c, &offset))>=0 1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two supplementary characters in (probably) the same window - define an extended one */ 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* 0xe000<=c<0xf300 */ { 1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote to avoid SCSU tags */ 1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=UQU<<16; 1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=isSingleByteMode; 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=dynamicWindow; 1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes: 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 139883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 140083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 140283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 141483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 141783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 142083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto loop; 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p; 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 0<=targetCapacity<length<=4 */ 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=(uint8_t *)cnv->charErrorBuffer; 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>24); 145183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>16); 145383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>8); 145583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p=(uint8_t)c; 1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c>>=8*length; /* length was reduced by targetCapacity */ 1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 147283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 147783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. 1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 1497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, 1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu; 1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 1507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isSingleByteMode; 1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t dynamicWindow; 1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t currentOffset; 1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t c, delta; 1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* variables for compression heuristics */ 1519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t offset; 1520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar lead, trail; 1521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int code; 1522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t window; 1523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 1526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu=(SCSUData *)cnv->extraInfo; 1527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 1531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 1532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 1533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the state machine state */ 1535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=scsu->fromUIsSingleByteMode; 1536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=scsu->fromUDynamicWindow; 1537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 1540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* similar conversion "loop" as in toUnicode */ 1542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruloop: 1543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isSingleByteMode) { 1544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailSingle; 1546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for single-byte mode */ 1549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* singleByteMode: */ 1550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c-0x20)<=0x5f) { 1559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* pass US-ASCII graphic character through */ 1560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0x20) { 1563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 1564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CR/LF/TAB/NUL */ 1565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C0 control character */ 1569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQ0<<8; 1570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((delta=c-currentOffset)<=0x7f) { 1574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 157783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else if(U16_IS_SURROGATE(c)) { 157883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_SURROGATE_LEAD(c)) { 1579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailSingle: 1580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 158483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 1585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 158683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 1587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character U+10000..U+10ffff */ 1607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((delta=c-currentOffset)<=0x7f) { 1608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use the current dynamic window */ 1609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(delta|0x80); 1610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* might check if there are more characters in this window to come */ 1621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define an extended window with this character */ 1622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to Unicode mode and output this (lead, trail) pair */ 1631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)SCU; 1633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 1634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xa0) { 1639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote C1 control character */ 1640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 1641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==0xfeff || c>=0xfff0) { 1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote signature character=byte order mark and specials */ 1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress all other BMP characters */ 1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a window defined that contains this character - switch to it or quote from it? */ 1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change to dynamic window */ 1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from dynamic window */ 1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(staticOffsets, c))>=0) { 1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote from static window */ 1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this character is not compressible (a BMP ideograph or similar); 1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch to Unicode mode if this is the last character in the block 1685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or there is at least one more ideograph following immediately 1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=FALSE; 1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SCU<<16; 1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote Unicode */ 1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=SQU<<16; 1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailUnicode; 1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* state machine for Unicode mode */ 1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* unicodeByteMode: */ 1709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity<=0) { 1711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 1718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not compressible, write character directly */ 1719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 1720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 1721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 1723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 1728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress BMP character if the following one is not an uncompressible ideograph */ 1729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 1730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 1731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ASCII digit or letter */ 1732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 1734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 1737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is a dynamic window that contains this character, change to it */ 1738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((code=getDynamicOffset(c, &offset))>=0) { 1746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* define a dynamic window with this character */ 1747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<0xe000) { 1761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c is a surrogate */ 176283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_SURROGATE_LEAD(c)) { 1763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailUnicode: 1764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lead=(UChar)c; 1765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 1766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=*source; 176883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 1769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 177083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 1771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 1772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compress supplementary character */ 1791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 1792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 1793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is a dynamic window that contains this character and 1796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following character is not uncompressible, 1797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change to the window 1798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=window; 1801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 1802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 1804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 1807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (code=getDynamicOffset(c, &offset))>=0 1808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two supplementary characters in (probably) the same window - define an extended one */ 1810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isSingleByteMode=TRUE; 1811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code-=0x200; 1812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dynamicWindow=getNextDynamicWindow(scsu); 1813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 1814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useDynamicWindow(scsu, dynamicWindow); 1815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 1816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't know how to compress this character, just write it directly */ 1820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((uint32_t)lead<<16)|trail; 1821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; 1822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* 0xe000<=c<0xf300 */ { 1825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* quote to avoid SCSU tags */ 1826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c|=UQU<<16; 1827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto outputBytes; 1829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUIsSingleByteMode=isSingleByteMode; 1839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scsu->fromUDynamicWindow=dynamicWindow; 1840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 1842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 1846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruoutputBytes: 1849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 1850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 1851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 1852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>24); 185683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 1857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 185883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 186083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 1867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* normal end of conversion: prepare for a new character */ 1869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto loop; 1871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p; 1873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 1876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 1877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 1878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 1879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 0<=targetCapacity<length<=4 */ 1881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 1882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 1883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=(uint8_t *)cnv->charErrorBuffer; 1884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 1885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 1887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>24); 188883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 1889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>16); 189083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(c>>8); 189283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p=(uint8_t)c; 1894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 1896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 1899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 1901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c>>=8*length; /* length was reduced by targetCapacity */ 1902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 1903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 1904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 1905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>16); 190683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 1907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(c>>8); 190883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 1909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 1910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 1915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 1916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* miscellaneous ------------------------------------------------------------ */ 1923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char * 1925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUGetName(const UConverter *cnv) { 1926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData *scsu=(SCSUData *)cnv->extraInfo; 1927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(scsu->locale) { 1929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case l_ja: 1930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return "SCSU,locale=ja"; 1931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return "SCSU"; 1933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* structure for SafeClone calculations */ 1937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct cloneSCSUStruct 1938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter cnv; 1940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SCSUData mydata; 1941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 1944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_SCSUSafeClone(const UConverter *cnv, 1945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *stackBuffer, 1946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *pBufferSize, 1947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status) 1948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct cloneSCSUStruct * localClone; 1950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); 1951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*status)){ 1953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 1957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pBufferSize = bufferSizeNeeded; 1958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone = (struct cloneSCSUStruct *)stackBuffer; 1962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 1963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); 1965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.extraInfo = &localClone->mydata; 1966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.isExtraLocal = TRUE; 1967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &localClone->cnv; 1969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _SCSUImpl={ 1973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_SCSU, 1974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUOpen, 1979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUClose, 1980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUReset, 1981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUToUnicode, 1983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUToUnicodeWithOffsets, 1984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUFromUnicode, 1985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUFromUnicodeWithOffsets, 1986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUGetName, 1990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _SCSUSafeClone, 1992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getCompleteUnicodeSet 1993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _SCSUStaticData={ 1996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "SCSU", 1998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1212, /* CCSID for SCSU */ 1999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, UCNV_SCSU, 2000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ 2001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode 2003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * substitution string. 2004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x0e, 0xff, 0xfd, 0 }, 3, 2006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, FALSE, 2007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 2008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 2009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 2010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _SCSUData={ 2013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t)0), 2014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl, 2015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 2016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 2019