1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Copyright (C) 2002-2011, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   file name:  ucnvbocu.cpp
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2002mar27
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   This is an implementation of the Binary Ordered Compression for Unicode,
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h"
2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "putilimp.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "uassert.h"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection),
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preserving lexical order.
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * small scripts:
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is always moved into the middle of such a block.
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * areas into the middle of those areas.
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes.
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space.
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_ASCII_PREV        0x40
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* bounding byte values for differences */
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MIN               0x21
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MIDDLE            0x90
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MAX_LEAD          0xfe
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_MAX_TRAIL         0xff
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_RESET             0xff
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of lead bytes */
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT  20
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of trail bytes */
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * number of positive and negative single-byte codes
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones)
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_SINGLE            64
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_2            43
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_3            3
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LEAD_4            1
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for single-byters. */
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for double-byters. */
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The difference value range for 3-byters. */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_POS_3   \
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The lead byte start values. */
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     /* ==BOCU1_MAX_LEAD */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     /* ==BOCU1_MIN+1 */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * themselves directly,
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ASCII-oriented software.
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These controls are
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  0   NUL
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  7   BEL
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  8   BS
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  9   TAB
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  a   LF
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  b   VT
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  c   FF
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  d   CR
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  e   SO
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  f   SI
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1a   SUB
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1b   ESC
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order)
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but are also used as trail bytes in difference encoding
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (for better compression).
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Byte value map for control codes,
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from external byte values 0x00..0x20
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1.
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  10    11    12    13    14    15    16    17   */
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  18    19    1a    1b    1c    1d    1e    1f   */
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  20   */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    -1
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Byte value map for control codes,
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to external byte values 0x00..0x20.
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*  10    11    12    13   */
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x1c, 0x1d, 0x1e, 0x1f
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Integer division and modulo with negative numerators
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * what we need here.
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0.
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For positive n, the if() condition is always FALSE.
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param n Number to be split into quotient and rest.
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          Will be modified to contain the quotient.
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param d Divisor.
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param m Output variable for the rest (modulo result).
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    (m)=(n)%(d); \
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    (n)/=(d); \
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((m)<0) { \
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --(n); \
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (m)+=(d); \
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } \
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/* Faster versions of packDiff() for single-byte-encoded diff values. */
21383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
21483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Is a diff value encodable in a single byte? */
21583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
21683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
21783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Encode a diff value in a single byte. */
21883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
21983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
22083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/** Is a diff value encodable in two bytes? */
22183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
22283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Compute the next "previous" value for differencing
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from the current code point.
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return "previous code point" state value
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
23483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querubocu1Prev(int32_t c) {
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* compute new prev */
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(/* 0x3040<=c && */ c<=0x309f) {
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Hiragana is not 128-aligned */
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0x3070;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0x4e00<=c && c<=0x9fa5) {
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* CJK Unihan */
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0x4e00-BOCU1_REACH_NEG_2;
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0xac00<=c /* && c<=0xd7a3 */) {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Korean Hangul */
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (0xd7a3+0xac00)/2;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mostly small scripts */
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return BOCU1_SIMPLE_PREV(c);
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** Fast version of bocu1Prev() for most scripts. */
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UConverter fields are used as follows:
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fromUnicodeStatus    encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * toUnicodeStatus      decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mode                 decoder's incomplete (diff<<2)|count (ignored when toULength==0)
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1-from-Unicode conversion functions --------------------------------- */
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return a packed integer with them.
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
27183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * The encoding favors small absolute differences with short encodings
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to compress runs of same-script characters.
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Optimized version with unrolled loops and fewer floating-point operations
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * than the standard packDiff().
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      0x010000zz for 1-byte sequence zz
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      0x0200yyzz for 2-byte sequence yy zz
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      0x03xxyyzz for 3-byte sequence xx yy zz
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerupackDiff(int32_t diff) {
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t result, m;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
28883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(diff>=BOCU1_REACH_NEG_1) {
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mostly positive differences, and single-byte negative ones */
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0   /* single-byte case handled in macros, see below */
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(diff<=BOCU1_REACH_POS_1) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* single byte */
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0x01000000|(BOCU1_MIDDLE+diff);
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(diff<=BOCU1_REACH_POS_2) {
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* two bytes */
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_1+1;
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=0x02000000;
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff%BOCU1_TRAIL_COUNT;
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff/=BOCU1_TRAIL_COUNT;
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m);
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=(BOCU1_START_POS_2+diff)<<8;
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(diff<=BOCU1_REACH_POS_3) {
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* three bytes */
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_2+1;
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=0x03000000;
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff%BOCU1_TRAIL_COUNT;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff/=BOCU1_TRAIL_COUNT;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m);
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff%BOCU1_TRAIL_COUNT;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff/=BOCU1_TRAIL_COUNT;
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=(BOCU1_START_POS_3+diff)<<16;
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* four bytes */
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_3+1;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff%BOCU1_TRAIL_COUNT;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff/=BOCU1_TRAIL_COUNT;
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=BOCU1_TRAIL_TO_BYTE(m);
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff%BOCU1_TRAIL_COUNT;
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff/=BOCU1_TRAIL_COUNT;
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * We know that / and % would deliver quotient 0 and rest=diff.
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Avoid division and modulo for performance.
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=((uint32_t)BOCU1_START_POS_4)<<24;
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* two- to four-byte negative differences */
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(diff>=BOCU1_REACH_NEG_2) {
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* two bytes */
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_1;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=0x02000000;
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m);
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=(BOCU1_START_NEG_2+diff)<<8;
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(diff>=BOCU1_REACH_NEG_3) {
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* three bytes */
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_2;
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=0x03000000;
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m);
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=(BOCU1_START_NEG_3+diff)<<16;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* four bytes */
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_3;
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=BOCU1_TRAIL_TO_BYTE(m);
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * We know that NEGDIVMOD would deliver
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Avoid division and modulo for performance.
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            m=diff+BOCU1_TRAIL_COUNT;
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result|=BOCU1_MIN<<24;
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             UErrorCode *pErrorCode) {
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit;
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t prev, c, diff;
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex, nextSourceIndex;
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_ALIGN_CODE(16)
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=pArgs->source;
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=pArgs->sourceLimit;
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pArgs->target;
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from UConverter */
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=cnv->fromUChar32;
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    prev=(int32_t)cnv->fromUnicodeStatus;
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(prev==0) {
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_ASCII_PREV;
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex= c==0 ? 0 : -1;
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    nextSourceIndex=0;
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0 && targetCapacity>0) {
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto getTrail;
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* fast loop for single-byte differences */
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* use only one loop counter variable, targetCapacity, not also source */
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=(int32_t)(sourceLimit-source);
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>diff) {
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=diff;
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0 && (c=*source)<0x3000) {
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c<=0x20) {
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)c;
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=nextSourceIndex++;
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++source;
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --targetCapacity;
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=c-prev;
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(DIFF_IS_SINGLE(diff)) {
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=nextSourceIndex++;
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++source;
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* restore real values */
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* regular loop for all cases */
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(source<sourceLimit) {
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(targetCapacity>0) {
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<=0x20) {
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * ISO C0 control & space:
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * Encode directly for MIME compatibility,
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * and reset state except for space, to not disrupt compression.
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c!=0x20) {
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    prev=BOCU1_ASCII_PREV;
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                continue;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            if(U16_IS_LEAD(c)) {
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail:
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source<sourceLimit) {
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* test the following code unit */
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UChar trail=*source;
48983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    if(U16_IS_TRAIL(trail)) {
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++source;
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++nextSourceIndex;
49283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                        c=U16_GET_SUPPLEMENTARY(c, trail);
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* no more input */
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * all other Unicode code points c==U+0021..U+10ffff
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * are encoded with the difference c-prev
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             *
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * a new prev is computed from c,
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * placed in the middle of a 0x80-block (for most small scripts) or
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * in the middle of the Unihan and Hangul blocks
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * to statistically minimize the following difference
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=c-prev;
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            prev=BOCU1_PREV(c);
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(DIFF_IS_SINGLE(diff)) {
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<0x3000) {
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* optimize 2-byte case */
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t m;
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(diff>=0) {
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff-=BOCU1_REACH_POS_1+1;
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    m=diff%BOCU1_TRAIL_COUNT;
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff/=BOCU1_TRAIL_COUNT;
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff+=BOCU1_START_POS_2;
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff-=BOCU1_REACH_NEG_1;
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff+=BOCU1_START_NEG_2;
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)diff;
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                targetCapacity-=2;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t length; /* will be 2..4 */
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=packDiff(diff);
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=BOCU1_LENGTH_FROM_PACKED(diff);
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* write the output character bytes from diff and length */
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* from the first if in the loop we know that targetCapacity>0 */
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(length<=targetCapacity) {
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(length) {
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 4:
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>24);
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
55483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 3: /*fall through*/
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>16);
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
55783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 2: /*fall through*/
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>8);
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* case 1: handled above */
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)diff;
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity-=length;
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceIndex=nextSourceIndex;
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint8_t *charErrorBuffer;
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * We actually do this backwards here:
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * In order to save an intermediate variable, we output
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * first to the overflow buffer what does not fit into the
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * regular target.
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* we know that 1<=targetCapacity<length<=4 */
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length-=targetCapacity;
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(length) {
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 3:
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer++=(uint8_t)(diff>>16);
58583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 2: /*fall through*/
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer++=(uint8_t)(diff>>8);
58783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 1: /*fall through*/
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer=(uint8_t)diff;
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->charErrorBufferLength=(int8_t)length;
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* now output what fits into the regular target */
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff>>=8*length; /* length was reduced by targetCapacity */
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(targetCapacity) {
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 3:
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>16);
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
60283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 2: /*fall through*/
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>8);
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
60583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 1: /*fall through*/
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)diff;
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *offsets++=sourceIndex;
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* target overflow */
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity=0;
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* target is full */
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUChar32= c<0 ? -c : 0;
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUnicodeStatus=(uint32_t)prev;
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=source;
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=(char *)target;
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex.
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  UErrorCode *pErrorCode) {
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit;
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t prev, c, diff;
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=pArgs->source;
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=pArgs->sourceLimit;
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pArgs->target;
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from UConverter */
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=cnv->fromUChar32;
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    prev=(int32_t)cnv->fromUnicodeStatus;
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(prev==0) {
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_ASCII_PREV;
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0 && targetCapacity>0) {
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto getTrail;
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* fast loop for single-byte differences */
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* use only one loop counter variable, targetCapacity, not also source */
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=(int32_t)(sourceLimit-source);
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>diff) {
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=diff;
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0 && (c=*source)<0x3000) {
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c<=0x20) {
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)c;
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=c-prev;
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(DIFF_IS_SINGLE(diff)) {
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --targetCapacity;
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* restore real values */
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* regular loop for all cases */
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(source<sourceLimit) {
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(targetCapacity>0) {
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=*source++;
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<=0x20) {
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * ISO C0 control & space:
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * Encode directly for MIME compatibility,
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * and reset state except for space, to not disrupt compression.
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c!=0x20) {
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    prev=BOCU1_ASCII_PREV;
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)c;
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                continue;
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
71983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            if(U16_IS_LEAD(c)) {
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail:
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source<sourceLimit) {
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* test the following code unit */
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UChar trail=*source;
72483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    if(U16_IS_TRAIL(trail)) {
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++source;
72683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                        c=U16_GET_SUPPLEMENTARY(c, trail);
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* no more input */
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * all other Unicode code points c==U+0021..U+10ffff
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * are encoded with the difference c-prev
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             *
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * a new prev is computed from c,
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * placed in the middle of a 0x80-block (for most small scripts) or
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * in the middle of the Unihan and Hangul blocks
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * to statistically minimize the following difference
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=c-prev;
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            prev=BOCU1_PREV(c);
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(DIFF_IS_SINGLE(diff)) {
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<0x3000) {
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto fastSingle;
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* optimize 2-byte case */
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t m;
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(diff>=0) {
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff-=BOCU1_REACH_POS_1+1;
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    m=diff%BOCU1_TRAIL_COUNT;
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff/=BOCU1_TRAIL_COUNT;
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff+=BOCU1_START_POS_2;
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff-=BOCU1_REACH_NEG_1;
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff+=BOCU1_START_NEG_2;
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)diff;
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                targetCapacity-=2;
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t length; /* will be 2..4 */
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=packDiff(diff);
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=BOCU1_LENGTH_FROM_PACKED(diff);
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* write the output character bytes from diff and length */
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* from the first if in the loop we know that targetCapacity>0 */
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(length<=targetCapacity) {
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(length) {
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 4:
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>24);
78283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 3: /*fall through*/
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>16);
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* case 2: handled above */
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>8);
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* case 1: handled above */
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)diff;
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity-=length;
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    uint8_t *charErrorBuffer;
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * We actually do this backwards here:
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * In order to save an intermediate variable, we output
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * first to the overflow buffer what does not fit into the
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * regular target.
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* we know that 1<=targetCapacity<length<=4 */
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length-=targetCapacity;
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(length) {
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 3:
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer++=(uint8_t)(diff>>16);
80983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 2: /*fall through*/
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer++=(uint8_t)(diff>>8);
81183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 1: /*fall through*/
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *charErrorBuffer=(uint8_t)diff;
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->charErrorBufferLength=(int8_t)length;
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* now output what fits into the regular target */
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diff>>=8*length; /* length was reduced by targetCapacity */
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    switch(targetCapacity) {
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* each branch falls through to the next one */
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    case 3:
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>16);
82583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 2: /*fall through*/
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)(diff>>8);
82783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    case 1: /*fall through*/
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *target++=(uint8_t)diff;
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    default:
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* will never occur */
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* target overflow */
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    targetCapacity=0;
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* target is full */
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the converter state back into UConverter */
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUChar32= c<0 ? -c : 0;
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->fromUnicodeStatus=(uint32_t)prev;
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=source;
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=(char *)target;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes.
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param b lead byte;
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return (diff<<2)|count
864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
86583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudecodeBocu1LeadByte(int32_t b) {
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t diff, count;
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(b>=BOCU1_START_NEG_2) {
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* positive difference */
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(b<BOCU1_START_POS_3) {
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* two bytes */
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=1;
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(b<BOCU1_START_POS_4) {
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* three bytes */
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=2;
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* four bytes */
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=BOCU1_REACH_POS_3+1;
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=3;
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* negative difference */
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(b>=BOCU1_START_NEG_3) {
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* two bytes */
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=1;
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(b>BOCU1_MIN) {
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* three bytes */
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=2;
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* four bytes */
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=3;
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* return the state for decoding the trail byte(s) */
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (diff<<2)|count;
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes.
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param count number of remaining trail bytes including this one
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param b trail byte
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return new delta for diff including b - <0 indicates an error
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see decodeBocu1
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
91483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudecodeBocu1TrailByte(int32_t count, int32_t b) {
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(b<=0x20) {
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* skip some C0 controls and make the trail byte range contiguous */
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b=bocu1ByteToTrail[b];
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* b<0 for an illegal trail byte value will result in return<0 below */
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(b>BOCU1_MAX_TRAIL) {
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -99;
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b-=BOCU1_TRAIL_BYTE_OFFSET;
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* add trail byte into difference and decrement count */
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count==1) {
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return b;
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(count==2) {
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return b*BOCU1_TRAIL_COUNT;
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* count==3 */ {
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode *pErrorCode) {
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target;
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit;
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t prev, count, diff, c;
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t byteIndex;
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *bytes;
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex, nextSourceIndex;
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pArgs->target;
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetLimit=pArgs->targetLimit;
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from UConverter */
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    prev=(int32_t)cnv->toUnicodeStatus;
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(prev==0) {
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_ASCII_PREV;
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=diff&3;
969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff>>=2;
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteIndex=cnv->toULength;
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    bytes=cnv->toUBytes;
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=byteIndex==0 ? 0 : -1;
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    nextSourceIndex=0;
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count>0 && byteIndex>0 && target<targetLimit) {
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto getTrail;
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* fast loop for single-byte differences */
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* use count as the only loop counter variable */
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=(int32_t)(sourceLimit-source);
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=(int32_t)(pArgs->targetLimit-target);
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count>diff) {
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count=diff;
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(count>0) {
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=prev+(c-BOCU1_MIDDLE);
994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0x3000) {
995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)c;
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=nextSourceIndex++;
997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c<=0x20) {
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=nextSourceIndex++;
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --count;
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* decode a sequence of single and lead bytes */
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(source<sourceLimit) {
1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(target>=targetLimit) {
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* target is full */
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++nextSourceIndex;
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=*source++;
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Write a code point directly from a single-byte difference. */
1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=prev+(c-BOCU1_MIDDLE);
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0x3000) {
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)c;
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceIndex=nextSourceIndex;
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c<=0x20) {
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Direct-encoded C0 control code or space.
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Reset prev for C0 control codes but not for space.
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=sourceIndex;
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceIndex=nextSourceIndex;
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Optimize two-byte case. */
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c>=BOCU1_MIDDLE) {
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* trail byte */
1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++nextSourceIndex;
1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=decodeBocu1TrailByte(1, *source++);
1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                bytes[0]=source[-2];
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                bytes[1]=source[-1];
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteIndex=2;
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c==BOCU1_RESET) {
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* only reset the state, no code point */
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            prev=BOCU1_ASCII_PREV;
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceIndex=nextSourceIndex;
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * For multi-byte difference lead bytes, set the decoder state
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * with the partial difference value from the lead byte and
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * with the number of trail bytes.
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            bytes[0]=(uint8_t)c;
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            byteIndex=1;
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=decodeBocu1LeadByte(c);
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=diff&3;
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff>>=2;
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail:
1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(;;) {
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source>=sourceLimit) {
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++nextSourceIndex;
1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=bytes[byteIndex++]=*source++;
1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* trail byte in any position */
1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=decodeBocu1TrailByte(count, c);
1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<0) {
1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff+=c;
1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(--count==0) {
1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* final trail byte, deliver a code point */
1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    byteIndex=0;
1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=prev+diff;
1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if((uint32_t)c>0x10ffff) {
1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto endloop;
1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* calculate the next prev and output c */
1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_PREV(c);
1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c<=0xffff) {
1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=sourceIndex;
1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* output surrogate pair */
111883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            *target++=U16_LEAD(c);
1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target<targetLimit) {
112083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                *target++=U16_TRAIL(c);
1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target overflow */
1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex;
112683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->UCharErrorBufferLength=1;
1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceIndex=nextSourceIndex;
1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the converter state in UConverter to deal with the next character */
1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->mode=0;
1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the converter state back into UConverter */
1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus=(uint32_t)prev;
1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->mode=(diff<<2)|count;
1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->toULength=byteIndex;
1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If a change is made in the original function, then either
1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change this function the same way or
1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * re-copy the original function and remove the variables
1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets, sourceIndex, and nextSourceIndex.
1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UErrorCode *pErrorCode) {
1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target;
1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit;
1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t prev, count, diff, c;
1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t byteIndex;
1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *bytes;
1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_ALIGN_CODE(16)
1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pArgs->target;
1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetLimit=pArgs->targetLimit;
1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from UConverter */
1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    prev=(int32_t)cnv->toUnicodeStatus;
1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(prev==0) {
1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_ASCII_PREV;
1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=diff&3;
1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff>>=2;
1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteIndex=cnv->toULength;
1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    bytes=cnv->toUBytes;
1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count>0 && byteIndex>0 && target<targetLimit) {
1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto getTrail;
1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufastSingle:
1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* fast loop for single-byte differences */
1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* use count as the only loop counter variable */
1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    diff=(int32_t)(sourceLimit-source);
1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=(int32_t)(pArgs->targetLimit-target);
1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count>diff) {
1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count=diff;
1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(count>0) {
1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=prev+(c-BOCU1_MIDDLE);
1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0x3000) {
1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)c;
1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c<=0x20) {
1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --count;
1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* decode a sequence of single and lead bytes */
1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(source<sourceLimit) {
1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(target>=targetLimit) {
1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* target is full */
1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=*source++;
1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Write a code point directly from a single-byte difference. */
1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=prev+(c-BOCU1_MIDDLE);
1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0x3000) {
1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(UChar)c;
1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_SIMPLE_PREV(c);
1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto fastSingle;
1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c<=0x20) {
1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Direct-encoded C0 control code or space.
1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Reset prev for C0 control codes but not for space.
1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c!=0x20) {
1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                prev=BOCU1_ASCII_PREV;
1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Optimize two-byte case. */
1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c>=BOCU1_MIDDLE) {
1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* trail byte */
1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=decodeBocu1TrailByte(1, *source++);
1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                bytes[0]=source[-2];
1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                bytes[1]=source[-1];
1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                byteIndex=2;
1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c==BOCU1_RESET) {
1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* only reset the state, no code point */
1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            prev=BOCU1_ASCII_PREV;
1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * For multi-byte difference lead bytes, set the decoder state
1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * with the partial difference value from the lead byte and
1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * with the number of trail bytes.
1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            bytes[0]=(uint8_t)c;
1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            byteIndex=1;
1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff=decodeBocu1LeadByte(c);
1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=diff&3;
1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            diff>>=2;
1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail:
1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(;;) {
1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source>=sourceLimit) {
1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=bytes[byteIndex++]=*source++;
1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* trail byte in any position */
1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=decodeBocu1TrailByte(count, c);
1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<0) {
1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto endloop;
1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                diff+=c;
1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(--count==0) {
1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* final trail byte, deliver a code point */
1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    byteIndex=0;
1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=prev+diff;
1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if((uint32_t)c>0x10ffff) {
1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        goto endloop;
1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* calculate the next prev and output c */
1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        prev=BOCU1_PREV(c);
1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c<=0xffff) {
1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(UChar)c;
1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* output surrogate pair */
132383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            *target++=U16_LEAD(c);
1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target<targetLimit) {
132583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                *target++=U16_TRAIL(c);
1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* target overflow */
132883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->UCharErrorBufferLength=1;
1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop:
1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the converter state in UConverter to deal with the next character */
1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->mode=0;
1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the converter state back into UConverter */
1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus=(uint32_t)prev;
1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->mode=(diff<<2)|count;
1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->toULength=byteIndex;
1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* miscellaneous ------------------------------------------------------------ */
1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _Bocu1Impl={
1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_BOCU1,
1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Bocu1ToUnicode,
1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Bocu1ToUnicodeWithOffsets,
1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Bocu1FromUnicode,
1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Bocu1FromUnicodeWithOffsets,
1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
137683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    ucnv_getCompleteUnicodeSet,
137783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
137883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    NULL,
137983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    NULL
1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _Bocu1StaticData={
1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "BOCU-1",
1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1214, /* CCSID for BOCU-1 */
1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_IBM, UCNV_BOCU1,
1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FALSE, FALSE,
1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _Bocu1Data={
1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t)0),
1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl,
139883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    0,
139983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UCNV_MBCS_TABLE_INITIALIZER
1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
1403