1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2002-2010, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  bocu1tst.c
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2002may27
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Markus W. Scherer
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This is the reference implementation of BOCU-1,
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   the MIME-friendly form of the Binary Ordered Compression for Unicode,
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   The files bocu1.h and bocu1.c from the design folder are taken
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   verbatim (minus copyright and #include) and copied together into this file.
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   The reference code and some of the reference bocu1tst.c
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   is modified to run as part of the ICU cintltst
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   test framework (minus main(), log_ln() etc. instead of printf()).
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This reference implementation is used here to verify
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   the ICU BOCU-1 implementation, which is
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   adapted for ICU conversion APIs and optimized.
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   ### links in design doc to here and to ucnvbocu.c
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection),
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order.
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * small scripts:
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is always moved into the middle of such a block.
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * areas into the middle of those areas.
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes.
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_ASCII_PREV        0x40
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* bounding byte values for differences */
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIN               0x21
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIDDLE            0x90
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_LEAD          0xfe
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_TRAIL         0xffL
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_RESET             0xff
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes */
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT  20
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of trail bytes */
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of positive and negative single-byte codes
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones)
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_SINGLE            64
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_2            43
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_3            3
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_4            1
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for single-byters. */
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for double-byters. */
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for 3-byters. */
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_3   \
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The lead byte start values. */
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     /* ==BOCU1_MAX_LEAD */
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     /* ==BOCU1_MIN+1 */
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * themselves directly,
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII-oriented software.
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * These controls are
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  0   NUL
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  7   BEL
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  8   BS
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  9   TAB
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  a   LF
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  b   VT
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  c   FF
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  d   CR
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  e   SO
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  f   SI
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1a   SUB
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1b   ESC
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order)
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but are also used as trail bytes in difference encoding
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (for better compression).
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes,
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from external byte values 0x00..0x20
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1.
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  10    11    12    13    14    15    16    17   */
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  18    19    1a    1b    1c    1d    1e    1f   */
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  20   */
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes,
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to external byte values 0x00..0x20.
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  10    11    12    13   */
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x1c, 0x1d, 0x1e, 0x1f
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Integer division and modulo with negative numerators
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * what we need here.
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0.
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For positive n, the if() condition is always FALSE.
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param n Number to be split into quotient and rest.
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          Will be modified to contain the quotient.
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param d Divisor.
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param m Output variable for the rest (modulo result).
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (m)=(n)%(d); \
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (n)/=(d); \
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((m)<0) { \
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --(n); \
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (m)+=(d); \
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } \
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* State for BOCU-1 decoder function. */
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Bocu1Rx {
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev, count, diff;
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct Bocu1Rx Bocu1Rx;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Function prototypes ------------------------------------------------------ */
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* see bocu1.c */
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff);
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c);
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b);
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.c ---------------------------------- */
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compute the next "previous" value for differencing
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the current code point.
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c current code point, 0..0x10ffff
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return "previous code point" state value
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE int32_t
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1Prev(int32_t c) {
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* compute new prev */
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(0x3040<=c && c<=0x309f) {
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Hiragana is not 128-aligned */
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x3070;
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(0x4e00<=c && c<=0x9fa5) {
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* CJK Unihan */
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x4e00-BOCU1_REACH_NEG_2;
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(0xac00<=c && c<=0xd7a3) {
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return ((int32_t)0xd7a3+(int32_t)0xac00)/2;
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* mostly small scripts */
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (c&~0x7f)+BOCU1_ASCII_PREV;
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return a packed integer with them.
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The encoding favors small absolut differences with short encodings
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to compress runs of same-script characters.
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x010000zz for 1-byte sequence zz
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x0200yyzz for 2-byte sequence yy zz
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x03xxyyzz for 3-byte sequence xx yy zz
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff) {
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result, m, lead, count, shift;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff>=BOCU1_REACH_NEG_1) {
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* mostly positive differences, and single-byte negative ones */
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(diff<=BOCU1_REACH_POS_1) {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* single byte */
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0x01000000|(BOCU1_MIDDLE+diff);
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff<=BOCU1_REACH_POS_2) {
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_1+1;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_2;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff<=BOCU1_REACH_POS_3) {
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_2+1;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_3;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_3+1;
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_4;
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* two- and four-byte negative differences */
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(diff>=BOCU1_REACH_NEG_2) {
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_1;
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_2;
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff>=BOCU1_REACH_NEG_3) {
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_2;
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_3;
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_3;
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_4;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* encode the length of the packed result */
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count<3) {
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result=(count+1)<<24;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* count==3, MSB used for the lead byte */ {
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result=0;
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* calculate trail bytes like digits in itoa() */
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    shift=0;
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result|=BOCU1_TRAIL_TO_BYTE(m)<<shift;
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        shift+=8;
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while(--count>0);
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* add lead byte */
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result|=(lead+diff)<<shift;
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encoder function.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pPrev pointer to the integer that holds
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the "previous code point" state;
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the initial value should be 0 which
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        encodeBocu1 will set to the actual BOCU-1 initial state value
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c the code point to encode
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the packed 1/2/3/4-byte encoding, see packDiff(),
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         or 0 if an error occurs
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c) {
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev;
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pPrev==NULL || c<0 || c>0x10ffff) {
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* illegal argument */
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=*pPrev;
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(prev==0) {
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* lenient handling of initial value 0 */
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prev=*pPrev=BOCU1_ASCII_PREV;
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(c<=0x20) {
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * ISO C0 control & space:
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Encode directly for MIME compatibility,
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * and reset state except for space, to not disrupt compression.
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c!=0x20) {
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pPrev=BOCU1_ASCII_PREV;
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x01000000|c;
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * all other Unicode code points c==U+0021..U+10ffff
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * are encoded with the difference c-prev
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * a new prev is computed from c,
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * placed in the middle of a 0x80-block (for most small scripts) or
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * in the middle of the Unihan and Hangul blocks
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * to statistically minimize the following difference
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *pPrev=bocu1Prev(c);
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return packDiff(c-prev);
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes.
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b lead byte;
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<=BOCU1_MAX_LEAD
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 (state change only)
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1LeadByte(Bocu1Rx *pRx, uint8_t b) {
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, count;
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(b>=BOCU1_START_NEG_2) {
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* positive difference */
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b<BOCU1_START_POS_3) {
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b<BOCU1_START_POS_4) {
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=BOCU1_REACH_POS_3+1;
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* negative difference */
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b>=BOCU1_START_NEG_3) {
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b>BOCU1_MIN) {
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* set the state for decoding the trail byte(s) */
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->diff=c;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->count=count;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes.
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b trail byte
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return result value, same as decodeBocu1
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1TrailByte(Bocu1Rx *pRx, uint8_t b) {
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t t, c, count;
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(b<=0x20) {
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* skip some C0 controls and make the trail byte range contiguous */
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t=bocu1ByteToTrail[b];
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(t<0) {
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* illegal trail byte value */
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -99;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(b>BOCU1_MAX_TRAIL) {
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -99;
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t=(int32_t)b-BOCU1_TRAIL_BYTE_OFFSET;
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* add trail byte into difference and decrement count */
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c=pRx->diff;
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=pRx->count;
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==1) {
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* final trail byte, deliver a code point */
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=pRx->prev+c+t;
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(0<=c && c<=0x10ffff) {
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* valid code point result */
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=bocu1Prev(c);
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return c;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* illegal code point result */
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -99;
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* intermediate trail byte */
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==2) {
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pRx->diff=c+t*BOCU1_TRAIL_COUNT;
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* count==3 */ {
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pRx->diff=c+t*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT;
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->count=count-1;
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 decoder function.
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure;
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the initial values should be 0 which
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        decodeBocu1 will set to actual initial state values
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b an input byte
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0..0x10ffff for a result code point
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      -1 if only the state changed without code point output
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *     <-1 if an error occurs
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b) {
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev, c, count;
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pRx==NULL) {
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* illegal argument */
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -99;
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=pRx->prev;
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(prev==0) {
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* lenient handling of initial 0 values */
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prev=pRx->prev=BOCU1_ASCII_PREV;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count=pRx->count=0;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count=pRx->count;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==0) {
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* byte in lead position */
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b<=0x20) {
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Direct-encoded C0 control code or space.
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Reset prev for C0 control codes but not for space.
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(b!=0x20) {
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pRx->prev=BOCU1_ASCII_PREV;
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return b;
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * b is a difference lead byte.
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Return a code point directly from a single-byte difference.
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * For multi-byte difference lead bytes, set the decoder state
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * with the partial difference value from the lead byte and
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * with the number of trail bytes.
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * For four-byte differences, the signedness also affects the
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * first trail byte, which has special handling farther below.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b>=BOCU1_START_NEG_2 && b<BOCU1_START_POS_2) {
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* single-byte difference */
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=prev+((int32_t)b-BOCU1_MIDDLE);
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=bocu1Prev(c);
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return c;
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b==BOCU1_RESET) {
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* only reset the state, no code point */
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return decodeBocu1LeadByte(pRx, b);
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in any position */
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return decodeBocu1TrailByte(pRx, b);
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1tst.c ------------------------------- */
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code ---------------------------------------------------------------- */
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code options */
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ignore comma when processing name lists in testText() */
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_IGNORE_COMMA       1
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Write a packed BOCU-1 byte sequence into a byte array,
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * without overflow check.
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence, see packDiff()
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to byte array
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwritePacked(int32_t packed, uint8_t *p) {
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count=BOCU1_LENGTH_FROM_PACKED(packed);
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>24);
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>16);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>8);
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)packed;
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return count;
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unpack a packed BOCU-1 non-C0/space byte sequence and get
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the difference to initialPrev.
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Used only for round-trip testing of the difference encoding and decoding.
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param initialPrev bogus "previous code point" value to make sure that
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *                    the resulting code point is in the range 0..0x10ffff
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the difference to initialPrev
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see writeDiff
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruunpackDiff(int32_t initialPrev, int32_t packed) {
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Bocu1Rx rx={ 0, 0, 0 };
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count;
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rx.prev=initialPrev;
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=BOCU1_LENGTH_FROM_PACKED(packed);
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>24));
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>16));
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>8));
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* subtract initial prev */
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return decodeBocu1(&rx, (uint8_t)packed)-initialPrev;
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -0x7fffffff;
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes,
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order.
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Also checks for roundtripping of the difference encoding.
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value to test, -0x10ffff..0x10ffff
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return p advanced by number of bytes output
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see unpackDiff
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint8_t *
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteDiff(int32_t diff, uint8_t *p) {
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* generate the difference as a packed value and serialize it */
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t packed, initialPrev;
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    packed=packDiff(diff);
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * bogus initial "prev" to work around
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * code point range check in decodeBocu1()
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff<=0) {
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initialPrev=0x10ffff;
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initialPrev=-1;
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff!=unpackDiff(initialPrev, packed)) {
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n",
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                diff, packed, unpackDiff(initialPrev, packed));
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return p+writePacked(packed, p);
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a UTF-16 string in BOCU-1.
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function.
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s input UTF-16 string
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of UChar code units in s
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes output
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteString(const UChar *s, int32_t length, uint8_t *p) {
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *p0;
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, prev, i;
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=0;
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    p0=p;
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i<length) {
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTF_NEXT_CHAR(s, i, length, c);
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        p+=writePacked(encodeBocu1(&prev, c), p);
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int32_t)(p-p0);
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Decode a BOCU-1 byte sequence to a UTF-16 string.
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function.
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to input BOCU-1 bytes
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of input bytes
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s point to output UTF-16 string array
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of UChar code units output
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerureadString(const uint8_t *p, int32_t length, UChar *s) {
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Bocu1Rx rx={ 0, 0, 0 };
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, i, sLength;
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=sLength=0;
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i<length) {
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=decodeBocu1(&rx, p[i++]);
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c<-1) {
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_err("error: readString detects encoding error at string index %ld\n", i);
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c>=0) {
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UTF_APPEND_CHAR_UNSAFE(s, sLength, c);
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sLength;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE char
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruhexDigit(uint8_t digit) {
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Pretty-print 0-terminated byte values.
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Helper function for test output.
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bytes 0-terminated byte array to print
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprintBytes(uint8_t *bytes, char *out) {
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i;
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t b;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while((b=*bytes++)!=0) {
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=' ';
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b>>4));
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b&0xf));
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++i;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=3*(5-i);
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i>0) {
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=' ';
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --i;
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *out=0;
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic BOCU-1 test function, called when there are no command line arguments.
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prints some of the #define values and performs round-trip tests of the
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * difference encoding and decoding.
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1RefDiff(void) {
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf1[80], buf2[80];
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t prev[5], level[5];
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, cmp, countErrors;
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of single bytes: %ld\n", 1+BOCU1_REACH_POS_1-BOCU1_REACH_NEG_1);
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of 2 bytes     : %ld\n", 1+BOCU1_REACH_POS_2-BOCU1_REACH_NEG_2);
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of 3 bytes     : %ld\n\n", 1+BOCU1_REACH_POS_3-BOCU1_REACH_NEG_3);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_1 %8ld    BOCU1_REACH_POS_1 %8ld\n", BOCU1_REACH_NEG_1, BOCU1_REACH_POS_1);
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_2 %8ld    BOCU1_REACH_POS_2 %8ld\n", BOCU1_REACH_NEG_2, BOCU1_REACH_POS_2);
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_3 %8ld    BOCU1_REACH_POS_3 %8ld\n\n", BOCU1_REACH_NEG_3, BOCU1_REACH_POS_3);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_MIDDLE      0x%02x\n", BOCU1_MIDDLE);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_START_NEG_2 0x%02x    BOCU1_START_POS_2 0x%02x\n", BOCU1_START_NEG_2, BOCU1_START_POS_2);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_START_NEG_3 0x%02x    BOCU1_START_POS_3 0x%02x\n\n", BOCU1_START_NEG_3, BOCU1_START_POS_3);
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* test packDiff() & unpackDiff() with some specific values */
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(0, level);
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(1, level);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(65, level);
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(130, level);
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(30000, level);
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(1000000, level);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-65, level);
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-130, level);
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-30000, level);
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-1000000, level);
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* test that each value is smaller than any following one */
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    countErrors=0;
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=-0x10ffff;
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *writeDiff(i, prev)=0;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* show first number and bytes */
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes(prev, buf1);
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("              wD(%8ld)                    %s\n", i, buf1);
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(++i; i<=0x10ffff; ++i) {
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *writeDiff(i, level)=0;
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cmp=strcmp((const char *)prev, (const char *)level);
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(BOCU1_LENGTH_FROM_LEAD(level[0])!=(int32_t)strlen((const char *)level)) {
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_verbose("BOCU1_LENGTH_FROM_LEAD(0x%02x)=%ld!=%ld=strlen(writeDiff(%ld))\n",
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   level[0], BOCU1_LENGTH_FROM_LEAD(level[0]), strlen((const char *)level), i);
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(cmp<0) {
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(i==0 || i==1 || strlen((const char *)prev)!=strlen((const char *)level)) {
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * if the result is good, then print only if the length changed
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * to get little but interesting output
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printBytes(prev, buf1);
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printBytes(level, buf2);
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                log_verbose("ok:    strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++countErrors;
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printBytes(prev, buf1);
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printBytes(level, buf2);
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_verbose("wrong: strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* remember the previous bytes */
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        memcpy(prev, level, 4);
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* show last number and bytes */
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes((uint8_t *)"", buf1);
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes(prev, buf2);
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("                            wD(%8ld)      %s%s\n", i-1, buf1, buf2);
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(countErrors==0) {
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_verbose("writeDiff(-0x10ffff..0x10ffff) works fine\n");
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("writeDiff(-0x10ffff..0x10ffff) violates lexical ordering in %d cases\n", countErrors);
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* output signature byte sequence */
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writePacked(encodeBocu1(&i, 0xfeff), level);
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("\nBOCU-1 signature byte sequence: %02x %02x %02x\n",
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            level[0], level[1], level[2]);
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* cintltst code ------------------------------------------------------------ */
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t DEFAULT_BUFFER_SIZE = 30000;
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test one string with the ICU and the reference BOCU-1 implementations */
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruroundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) {
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *roundtripRef, *roundtripICU;
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bocu1Ref, *bocu1ICU;
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength;
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1Ref = malloc(DEFAULT_BUFFER_SIZE);
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1ICU = malloc(DEFAULT_BUFFER_SIZE);
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Unicode -> BOCU-1 */
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref);
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode);
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) {
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength);
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* BOCU-1 -> Unicode */
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef);
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(roundtripRefLength<0) {
90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        free(roundtripICU);
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return; /* readString() found an error and reported it */
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode);
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) {
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength);
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) {
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength);
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(roundtripRef);
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(roundtripICU);
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(bocu1Ref);
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(bocu1ICU);
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar feff[]={ 0xfeff };
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ascii[]={ 0x61, 0x62, 0x20, 0x63, 0x61 };
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar crlf[]={ 0xd, 0xa, 0x20 };
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar nul[]={ 0 };
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar latin[]={ 0xdf, 0xe6 };
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar devanagari[]={ 0x930, 0x20, 0x918, 0x909 };
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hiragana[]={ 0x3086, 0x304d, 0x20, 0x3053, 0x4000 };
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar unihan[]={ 0x4e00, 0x7777, 0x20, 0x9fa5, 0x4e00 };
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hangul[]={ 0xac00, 0xbcde, 0x20, 0xd7a3 };
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar surrogates[]={ 0xdc00, 0xd800 }; /* single surrogates, unmatched! */
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane1[]={ 0xd800, 0xdc00 };
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane2[]={ 0xd845, 0xdddd };
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 };
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane16[]={ 0xdbff, 0xdfff };
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar c0[]={ 1, 0xe40, 0x20, 9 };
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct {
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s;
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length;
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} strings[]={
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { feff,         LENGTHOF(feff) },
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { ascii,        LENGTHOF(ascii) },
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { crlf,         LENGTHOF(crlf) },
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { nul,          LENGTHOF(nul) },
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { latin,        LENGTHOF(latin) },
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { devanagari,   LENGTHOF(devanagari) },
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { hiragana,     LENGTHOF(hiragana) },
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { unihan,       LENGTHOF(unihan) },
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { hangul,       LENGTHOF(hangul) },
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { surrogates,   LENGTHOF(surrogates) },
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { plane1,       LENGTHOF(plane1) },
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { plane2,       LENGTHOF(plane2) },
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { plane15,      LENGTHOF(plane15) },
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { plane16,      LENGTHOF(plane16) },
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { c0,           LENGTHOF(c0) }
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that the ICU BOCU-1 implementation produces the same results as
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reference implementation from the design folder.
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate some texts and convert them with both converters, verifying
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * identical results and roundtripping.
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1(void) {
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *text;
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, length;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *bocu1;
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode;
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1=ucnv_open("BOCU-1", &errorCode);
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode));
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 1: each of strings[] once */
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<LENGTHOF(strings); ++i) {
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 1, text, length);
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 2: each of strings[] twice */
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<LENGTHOF(strings); ++i) {
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 2, text, length);
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=1; length<5000; i+=7) {
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(i>=LENGTHOF(strings)) {
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            i-=LENGTHOF(strings);
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 3, text, length);
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(bocu1);
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(text);
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void addBOCU1Tests(TestNode** root);
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddBOCU1Tests(TestNode** root) {
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff");
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1");
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1033