1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2002-2011, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* file name: ucnvbocu.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2002mar27 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* This is an implementation of the Binary Ordered Compression for Unicode, 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h" 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "putilimp.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "uassert.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */ 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection), 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preserving lexical order. 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * small scripts: 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is always moved into the middle of such a block. 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * areas into the middle of those areas. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space. 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_ASCII_PREV 0x40 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* bounding byte values for differences */ 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MIN 0x21 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MIDDLE 0x90 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MAX_LEAD 0xfe 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MAX_TRAIL 0xff 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_RESET 0xff 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of lead bytes */ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT 20 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of trail bytes */ 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * number of positive and negative single-byte codes 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones) 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_SINGLE 64 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_2 43 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_3 3 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_4 1 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for single-byters. */ 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for double-byters. */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for 3-byters. */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_3 \ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The lead byte start values. */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ==BOCU1_MAX_LEAD */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ==BOCU1_MIN+1 */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4) 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */ 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * themselves directly, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ASCII-oriented software. 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These controls are 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0 NUL 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 7 BEL 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 8 BS 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 9 TAB 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a LF 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * b VT 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c FF 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * d CR 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * e SO 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * f SI 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1a SUB 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1b ESC 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order) 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but are also used as trail bytes in difference encoding 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (for better compression). 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Byte value map for control codes, 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from external byte values 0x00..0x20 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation. 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1. 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={ 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 8 9 a b c d e f */ 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1, -1, -1, -1, -1, -1, -1, -1, 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 10 11 12 13 14 15 16 17 */ 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 18 19 1a 1b 1c 1d 1e 1f */ 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 20 */ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Byte value map for control codes, 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to external byte values 0x00..0x20. 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 8 9 a b c d e f */ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 10 11 12 13 */ 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x1c, 0x1d, 0x1e, 0x1f 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Integer division and modulo with negative numerators 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * what we need here. 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0. 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For positive n, the if() condition is always FALSE. 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param n Number to be split into quotient and rest. 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Will be modified to contain the quotient. 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param d Divisor. 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param m Output variable for the rest (modulo result). 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \ 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (m)=(n)%(d); \ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (n)/=(d); \ 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((m)<0) { \ 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --(n); \ 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (m)+=(d); \ 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } \ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/* Faster versions of packDiff() for single-byte-encoded diff values. */ 21383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 21483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Is a diff value encodable in a single byte? */ 21583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) 21683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 21783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Encode a diff value in a single byte. */ 21883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) 21983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 22083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Is a diff value encodable in two bytes? */ 22183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) 22283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV) 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Compute the next "previous" value for differencing 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from the current code point. 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below) 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return "previous code point" state value 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 23483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1Prev(int32_t c) { 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* compute new prev */ 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(/* 0x3040<=c && */ c<=0x309f) { 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Hiragana is not 128-aligned */ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0x3070; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(0x4e00<=c && c<=0x9fa5) { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CJK Unihan */ 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0x4e00-BOCU1_REACH_NEG_2; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(0xac00<=c /* && c<=0xd7a3 */) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Korean Hangul */ 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (0xd7a3+0xac00)/2; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mostly small scripts */ 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return BOCU1_SIMPLE_PREV(c); 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** Fast version of bocu1Prev() for most scripts. */ 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)) 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c. 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UConverter fields are used as follows: 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0) 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1-from-Unicode conversion functions --------------------------------- */ 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return a packed integer with them. 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 27183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * The encoding favors small absolute differences with short encodings 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to compress runs of same-script characters. 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Optimized version with unrolled loops and fewer floating-point operations 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * than the standard packDiff(). 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0x010000zz for 1-byte sequence zz 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0x0200yyzz for 2-byte sequence yy zz 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0x03xxyyzz for 3-byte sequence xx yy zz 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerupackDiff(int32_t diff) { 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t result, m; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */ 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_1) { 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mostly positive differences, and single-byte negative ones */ 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 /* single-byte case handled in macros, see below */ 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff<=BOCU1_REACH_POS_1) { 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* single byte */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0x01000000|(BOCU1_MIDDLE+diff); 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff<=BOCU1_REACH_POS_2) { 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two bytes */ 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_POS_1+1; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=0x02000000; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m); 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=(BOCU1_START_POS_2+diff)<<8; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(diff<=BOCU1_REACH_POS_3) { 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* three bytes */ 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_POS_2+1; 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=0x03000000; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<8; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=(BOCU1_START_POS_3+diff)<<16; 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* four bytes */ 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_POS_3+1; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=BOCU1_TRAIL_TO_BYTE(m); 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<8; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We know that / and % would deliver quotient 0 and rest=diff. 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Avoid division and modulo for performance. 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(diff)<<16; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=((uint32_t)BOCU1_START_POS_4)<<24; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two- to four-byte negative differences */ 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_2) { 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two bytes */ 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_1; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=0x02000000; 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m); 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=(BOCU1_START_NEG_2+diff)<<8; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(diff>=BOCU1_REACH_NEG_3) { 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* three bytes */ 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_2; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=0x03000000; 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<8; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=(BOCU1_START_NEG_3+diff)<<16; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* four bytes */ 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_3; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=BOCU1_TRAIL_TO_BYTE(m); 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<8; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We know that NEGDIVMOD would deliver 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT. 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Avoid division and modulo for performance. 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff+BOCU1_TRAIL_COUNT; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<16; 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result|=BOCU1_MIN<<24; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t prev, c, diff; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_ALIGN_CODE(16) 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the converter state from UConverter */ 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=(int32_t)cnv->fromUnicodeStatus; 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prev==0) { 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex= c==0 ? 0 : -1; 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* conversion loop */ 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast loop for single-byte differences */ 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use only one loop counter variable, targetCapacity, not also source */ 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=(int32_t)(sourceLimit-source); 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>diff) { 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=diff; 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(targetCapacity>0 && (c=*source)<0x3000) { 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0x20) { 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=nextSourceIndex++; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=c-prev; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(DIFF_IS_SINGLE(diff)) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)PACK_SINGLE_DIFF(diff); 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=nextSourceIndex++; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* restore real values */ 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* regular loop for all cases */ 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>0) { 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0x20) { 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO C0 control & space: 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Encode directly for MIME compatibility, 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and reset state except for space, to not disrupt compression. 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_LEAD(c)) { 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=*source; 48983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 49283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all other Unicode code points c==U+0021..U+10ffff 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are encoded with the difference c-prev 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a new prev is computed from c, 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * placed in the middle of a 0x80-block (for most small scripts) or 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in the middle of the Unihan and Hangul blocks 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to statistically minimize the following difference 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=c-prev; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_PREV(c); 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(DIFF_IS_SINGLE(diff)) { 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)PACK_SINGLE_DIFF(diff); 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* optimize 2-byte case */ 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t m; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff>=0) { 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_POS_1+1; 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=BOCU1_START_POS_2; 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_1; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=BOCU1_START_NEG_2; 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; /* will be 2..4 */ 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=packDiff(diff); 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=BOCU1_LENGTH_FROM_PACKED(diff); 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from diff and length */ 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>24); 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 55483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>16); 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 55783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>8); 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case 1: handled above */ 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *charErrorBuffer; 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 1<=targetCapacity<length<=4 */ 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer++=(uint8_t)(diff>>16); 58583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer++=(uint8_t)(diff>>8); 58783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer=(uint8_t)diff; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=8*length; /* length was reduced by targetCapacity */ 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>16); 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 60283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>8); 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 60583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32= c<0 ? -c : 0; 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=(uint32_t)prev; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling. 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs, 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source, *sourceLimit; 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t prev, c, diff; 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=pArgs->sourceLimit; 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pArgs->target; 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the converter state from UConverter */ 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=cnv->fromUChar32; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=(int32_t)cnv->fromUnicodeStatus; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prev==0) { 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* conversion loop */ 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0 && targetCapacity>0) { 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast loop for single-byte differences */ 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use only one loop counter variable, targetCapacity, not also source */ 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=(int32_t)(sourceLimit-source); 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>diff) { 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=diff; 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(targetCapacity>0 && (c=*source)<0x3000) { 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0x20) { 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=c-prev; 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(DIFF_IS_SINGLE(diff)) { 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)PACK_SINGLE_DIFF(diff); 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* restore real values */ 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* regular loop for all cases */ 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>0) { 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0x20) { 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO C0 control & space: 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Encode directly for MIME compatibility, 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and reset state except for space, to not disrupt compression. 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)c; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_LEAD(c)) { 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=*source; 72483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_TRAIL(trail)) { 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 72683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, trail); 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all other Unicode code points c==U+0021..U+10ffff 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are encoded with the difference c-prev 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a new prev is computed from c, 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * placed in the middle of a 0x80-block (for most small scripts) or 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in the middle of the Unihan and Hangul blocks 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to statistically minimize the following difference 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=c-prev; 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_PREV(c); 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(DIFF_IS_SINGLE(diff)) { 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)PACK_SINGLE_DIFF(diff); 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* optimize 2-byte case */ 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t m; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(diff>=0) { 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_POS_1+1; 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=diff%BOCU1_TRAIL_COUNT; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff/=BOCU1_TRAIL_COUNT; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=BOCU1_START_POS_2; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_1; 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=BOCU1_START_NEG_2; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; /* will be 2..4 */ 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=packDiff(diff); 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=BOCU1_LENGTH_FROM_PACKED(diff); 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output character bytes from diff and length */ 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* from the first if in the loop we know that targetCapacity>0 */ 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=targetCapacity) { 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>24); 78283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 3: /*fall through*/ 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>16); 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case 2: handled above */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>8); 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case 1: handled above */ 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=length; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *charErrorBuffer; 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We actually do this backwards here: 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In order to save an intermediate variable, we output 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * first to the overflow buffer what does not fit into the 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * regular target. 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that 1<=targetCapacity<length<=4 */ 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=targetCapacity; 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer++=(uint8_t)(diff>>16); 80983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer++=(uint8_t)(diff>>8); 81183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *charErrorBuffer=(uint8_t)diff; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength=(int8_t)length; 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now output what fits into the regular target */ 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=8*length; /* length was reduced by targetCapacity */ 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(targetCapacity) { 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* each branch falls through to the next one */ 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>16); 82583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 2: /*fall through*/ 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)(diff>>8); 82783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius case 1: /*fall through*/ 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(uint8_t)diff; 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will never occur */ 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32= c<0 ? -c : 0; 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=(uint32_t)prev; 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1-to-Unicode conversion functions ----------------------------------- */ 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes. 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param b lead byte; 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return (diff<<2)|count 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 86583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudecodeBocu1LeadByte(int32_t b) { 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t diff, count; 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b>=BOCU1_START_NEG_2) { 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* positive difference */ 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<BOCU1_START_POS_3) { 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two bytes */ 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=1; 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<BOCU1_START_POS_4) { 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* three bytes */ 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2; 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* four bytes */ 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=BOCU1_REACH_POS_3+1; 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=3; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* negative difference */ 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b>=BOCU1_START_NEG_3) { 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* two bytes */ 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=1; 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>BOCU1_MIN) { 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* three bytes */ 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2; 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* four bytes */ 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=3; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return the state for decoding the trail byte(s) */ 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (diff<<2)|count; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes. 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param count number of remaining trail bytes including this one 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param b trail byte 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return new delta for diff including b - <0 indicates an error 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see decodeBocu1 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 91483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudecodeBocu1TrailByte(int32_t count, int32_t b) { 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b<=0x20) { 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* skip some C0 controls and make the trail byte range contiguous */ 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=bocu1ByteToTrail[b]; 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* b<0 for an illegal trail byte value will result in return<0 below */ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b>BOCU1_MAX_TRAIL) { 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -99; 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b-=BOCU1_TRAIL_BYTE_OFFSET; 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add trail byte into difference and decrement count */ 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==1) { 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return b; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count==2) { 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return b*BOCU1_TRAIL_COUNT; 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* count==3 */ { 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t prev, count, diff, c; 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t byteIndex; 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes; 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, nextSourceIndex; 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the converter state from UConverter */ 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=(int32_t)cnv->toUnicodeStatus; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prev==0) { 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff&3; 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=2; 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=cnv->toULength; 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=cnv->toUBytes; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sourceIndex=-1 if the current character began in the previous buffer */ 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=byteIndex==0 ? 0 : -1; 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex=0; 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>0 && byteIndex>0 && target<targetLimit) { 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast loop for single-byte differences */ 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use count as the only loop counter variable */ 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=(int32_t)(sourceLimit-source); 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=(int32_t)(pArgs->targetLimit-target); 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>diff) { 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff; 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) { 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+(c-BOCU1_MIDDLE); 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=nextSourceIndex++; 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<=0x20) { 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=nextSourceIndex++; 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* decode a sequence of single and lead bytes */ 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) { 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Write a code point directly from a single-byte difference. */ 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+(c-BOCU1_MIDDLE); 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<=0x20) { 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Direct-encoded C0 control code or space. 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reset prev for C0 control codes but not for space. 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) { 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Optimize two-byte case. */ 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c>=BOCU1_MIDDLE) { 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trail byte */ 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=decodeBocu1TrailByte(1, *source++); 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[0]=source[-2]; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=source[-1]; 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=2; 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==BOCU1_RESET) { 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only reset the state, no code point */ 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For multi-byte difference lead bytes, set the decoder state 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with the partial difference value from the lead byte and 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with the number of trail bytes. 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[0]=(uint8_t)c; 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=1; 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=decodeBocu1LeadByte(c); 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff&3; 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=2; 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit) { 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=bytes[byteIndex++]=*source++; 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trail byte in any position */ 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=decodeBocu1TrailByte(count, c); 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=c; 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(--count==0) { 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* final trail byte, deliver a code point */ 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=0; 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+diff; 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)c>0x10ffff) { 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the next prev and output c */ 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_PREV(c); 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 111883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius *target++=U16_LEAD(c); 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 112083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius *target++=U16_TRAIL(c); 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 112683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius cnv->UCharErrorBuffer[0]=U16_TRAIL(c); 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=nextSourceIndex; 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state in UConverter to deal with the next character */ 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=BOCU1_ASCII_PREV; 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=(uint32_t)prev; 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=(diff<<2)|count; 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=byteIndex; 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling. 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex. 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs, 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit; 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t prev, count, diff, c; 1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t byteIndex; 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes; 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_ALIGN_CODE(16) 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetLimit=pArgs->targetLimit; 1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the converter state from UConverter */ 1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=(int32_t)cnv->toUnicodeStatus; 1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prev==0) { 1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ 1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff&3; 1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=2; 1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=cnv->toULength; 1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=cnv->toUBytes; 1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ 1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>0 && byteIndex>0 && target<targetLimit) { 1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle: 1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fast loop for single-byte differences */ 1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use count as the only loop counter variable */ 1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=(int32_t)(sourceLimit-source); 1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=(int32_t)(pArgs->targetLimit-target); 1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>diff) { 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff; 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) { 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+(c-BOCU1_MIDDLE); 1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<=0x20) { 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* decode a sequence of single and lead bytes */ 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit) { 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target>=targetLimit) { 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target is full */ 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) { 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Write a code point directly from a single-byte difference. */ 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+(c-BOCU1_MIDDLE); 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x3000) { 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_SIMPLE_PREV(c); 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fastSingle; 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c<=0x20) { 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Direct-encoded C0 control code or space. 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reset prev for C0 control codes but not for space. 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0x20) { 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) { 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Optimize two-byte case. */ 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c>=BOCU1_MIDDLE) { 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; 1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trail byte */ 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=decodeBocu1TrailByte(1, *source++); 1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[0]=source[-2]; 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=source[-1]; 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=2; 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c==BOCU1_RESET) { 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only reset the state, no code point */ 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_ASCII_PREV; 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For multi-byte difference lead bytes, set the decoder state 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with the partial difference value from the lead byte and 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with the number of trail bytes. 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[0]=(uint8_t)c; 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=1; 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff=decodeBocu1LeadByte(c); 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=diff&3; 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff>>=2; 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source>=sourceLimit) { 1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=bytes[byteIndex++]=*source++; 1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trail byte in any position */ 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=decodeBocu1TrailByte(count, c); 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru diff+=c; 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(--count==0) { 1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* final trail byte, deliver a code point */ 1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteIndex=0; 1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=prev+diff; 1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint32_t)c>0x10ffff) { 1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the next prev and output c */ 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=BOCU1_PREV(c); 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=(UChar)c; 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output surrogate pair */ 132383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius *target++=U16_LEAD(c); 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target<targetLimit) { 132583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius *target++=U16_TRAIL(c); 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* target overflow */ 132883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius cnv->UCharErrorBuffer[0]=U16_TRAIL(c); 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state in UConverter to deal with the next character */ 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=BOCU1_ASCII_PREV; 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the converter state back into UConverter */ 1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=(uint32_t)prev; 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=(diff<<2)|count; 1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=byteIndex; 1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* miscellaneous ------------------------------------------------------------ */ 1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _Bocu1Impl={ 1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_BOCU1, 1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _Bocu1ToUnicode, 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _Bocu1ToUnicodeWithOffsets, 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _Bocu1FromUnicode, 1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _Bocu1FromUnicodeWithOffsets, 1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 137683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ucnv_getCompleteUnicodeSet, 137783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 137883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius NULL, 137983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius NULL 1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _Bocu1StaticData={ 1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "BOCU-1", 1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1214, /* CCSID for BOCU-1 */ 1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, UCNV_BOCU1, 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */ 1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */ 1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, FALSE, 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _Bocu1Data={ 1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t)0), 1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl, 139883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 0, 139983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCNV_MBCS_TABLE_INITIALIZER 1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1403