164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
61b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert*   Copyright (C) 2002-2015, International Business Machines
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  bocu1tst.c
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2002may27
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Markus W. Scherer
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This is the reference implementation of BOCU-1,
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   the MIME-friendly form of the Binary Ordered Compression for Unicode,
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   The files bocu1.h and bocu1.c from the design folder are taken
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   verbatim (minus copyright and #include) and copied together into this file.
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   The reference code and some of the reference bocu1tst.c
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   is modified to run as part of the ICU cintltst
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   test framework (minus main(), log_ln() etc. instead of printf()).
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This reference implementation is used here to verify
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   the ICU BOCU-1 implementation, which is
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   adapted for ICU conversion APIs and optimized.
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   ### links in design doc to here and to ucnvbocu.c
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection),
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order.
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * small scripts:
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is always moved into the middle of such a block.
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * areas into the middle of those areas.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space.
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_ASCII_PREV        0x40
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* bounding byte values for differences */
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIN               0x21
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIDDLE            0x90
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_LEAD          0xfe
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_TRAIL         0xffL
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_RESET             0xff
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes */
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT  20
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of trail bytes */
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of positive and negative single-byte codes
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones)
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_SINGLE            64
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_2            43
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_3            3
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_4            1
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for single-byters. */
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for double-byters. */
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for 3-byters. */
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_3   \
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The lead byte start values. */
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     /* ==BOCU1_MAX_LEAD */
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     /* ==BOCU1_MIN+1 */
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * themselves directly,
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII-oriented software.
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * These controls are
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  0   NUL
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  7   BEL
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  8   BS
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  9   TAB
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  a   LF
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  b   VT
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  c   FF
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  d   CR
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  e   SO
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *  f   SI
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1a   SUB
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1b   ESC
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order)
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but are also used as trail bytes in difference encoding
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (for better compression).
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes,
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from external byte values 0x00..0x20
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1.
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  10    11    12    13    14    15    16    17   */
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  18    19    1a    1b    1c    1d    1e    1f   */
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  20   */
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    -1
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes,
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to external byte values 0x00..0x20.
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  0     1     2     3     4     5     6     7    */
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  8     9     a     b     c     d     e     f    */
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*  10    11    12    13   */
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0x1c, 0x1d, 0x1e, 0x1f
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Integer division and modulo with negative numerators
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * what we need here.
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0.
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For positive n, the if() condition is always FALSE.
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param n Number to be split into quotient and rest.
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          Will be modified to contain the quotient.
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param d Divisor.
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param m Output variable for the rest (modulo result).
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (m)=(n)%(d); \
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    (n)/=(d); \
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((m)<0) { \
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --(n); \
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (m)+=(d); \
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } \
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* State for BOCU-1 decoder function. */
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Bocu1Rx {
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev, count, diff;
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct Bocu1Rx Bocu1Rx;
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Function prototypes ------------------------------------------------------ */
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* see bocu1.c */
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff);
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c);
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b);
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.c ---------------------------------- */
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compute the next "previous" value for differencing
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the current code point.
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c current code point, 0..0x10ffff
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return "previous code point" state value
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
25483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic int32_t
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1Prev(int32_t c) {
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* compute new prev */
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(0x3040<=c && c<=0x309f) {
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Hiragana is not 128-aligned */
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x3070;
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(0x4e00<=c && c<=0x9fa5) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* CJK Unihan */
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x4e00-BOCU1_REACH_NEG_2;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(0xac00<=c && c<=0xd7a3) {
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return ((int32_t)0xd7a3+(int32_t)0xac00)/2;
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* mostly small scripts */
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (c&~0x7f)+BOCU1_ASCII_PREV;
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return a packed integer with them.
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The encoding favors small absolut differences with short encodings
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to compress runs of same-script characters.
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x010000zz for 1-byte sequence zz
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x0200yyzz for 2-byte sequence yy zz
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0x03xxyyzz for 3-byte sequence xx yy zz
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff) {
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result, m, lead, count, shift;
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff>=BOCU1_REACH_NEG_1) {
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* mostly positive differences, and single-byte negative ones */
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(diff<=BOCU1_REACH_POS_1) {
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* single byte */
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0x01000000|(BOCU1_MIDDLE+diff);
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff<=BOCU1_REACH_POS_2) {
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_1+1;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_2;
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff<=BOCU1_REACH_POS_3) {
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_2+1;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_3;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_POS_3+1;
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_POS_4;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* two- and four-byte negative differences */
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(diff>=BOCU1_REACH_NEG_2) {
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_1;
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_2;
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(diff>=BOCU1_REACH_NEG_3) {
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_2;
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_3;
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            diff-=BOCU1_REACH_NEG_3;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lead=BOCU1_START_NEG_4;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* encode the length of the packed result */
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count<3) {
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result=(count+1)<<24;
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* count==3, MSB used for the lead byte */ {
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result=0;
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* calculate trail bytes like digits in itoa() */
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    shift=0;
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result|=BOCU1_TRAIL_TO_BYTE(m)<<shift;
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        shift+=8;
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while(--count>0);
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* add lead byte */
34764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    result |= (uint32_t)(lead+diff)<<shift;
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encoder function.
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pPrev pointer to the integer that holds
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the "previous code point" state;
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the initial value should be 0 which
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        encodeBocu1 will set to the actual BOCU-1 initial state value
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c the code point to encode
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the packed 1/2/3/4-byte encoding, see packDiff(),
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *         or 0 if an error occurs
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c) {
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev;
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pPrev==NULL || c<0 || c>0x10ffff) {
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* illegal argument */
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=*pPrev;
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(prev==0) {
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* lenient handling of initial value 0 */
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prev=*pPrev=BOCU1_ASCII_PREV;
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(c<=0x20) {
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * ISO C0 control & space:
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Encode directly for MIME compatibility,
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * and reset state except for space, to not disrupt compression.
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c!=0x20) {
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pPrev=BOCU1_ASCII_PREV;
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x01000000|c;
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * all other Unicode code points c==U+0021..U+10ffff
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * are encoded with the difference c-prev
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * a new prev is computed from c,
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * placed in the middle of a 0x80-block (for most small scripts) or
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * in the middle of the Unihan and Hangul blocks
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * to statistically minimize the following difference
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *pPrev=bocu1Prev(c);
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return packDiff(c-prev);
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes.
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b lead byte;
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<=BOCU1_MAX_LEAD
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 (state change only)
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1LeadByte(Bocu1Rx *pRx, uint8_t b) {
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, count;
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(b>=BOCU1_START_NEG_2) {
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* positive difference */
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b<BOCU1_START_POS_3) {
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b<BOCU1_START_POS_4) {
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=BOCU1_REACH_POS_3+1;
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* negative difference */
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b>=BOCU1_START_NEG_3) {
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* two bytes */
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=1;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b>BOCU1_MIN) {
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* three bytes */
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=2;
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* four bytes */
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count=3;
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* set the state for decoding the trail byte(s) */
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->diff=c;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->count=count;
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes.
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b trail byte
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return result value, same as decodeBocu1
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1TrailByte(Bocu1Rx *pRx, uint8_t b) {
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t t, c, count;
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(b<=0x20) {
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* skip some C0 controls and make the trail byte range contiguous */
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t=bocu1ByteToTrail[b];
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(t<0) {
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* illegal trail byte value */
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -99;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(b>BOCU1_MAX_TRAIL) {
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -99;
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t=(int32_t)b-BOCU1_TRAIL_BYTE_OFFSET;
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* add trail byte into difference and decrement count */
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c=pRx->diff;
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=pRx->count;
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==1) {
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* final trail byte, deliver a code point */
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=pRx->prev+c+t;
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(0<=c && c<=0x10ffff) {
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* valid code point result */
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=bocu1Prev(c);
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return c;
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* illegal code point result */
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->count=0;
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -99;
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* intermediate trail byte */
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==2) {
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pRx->diff=c+t*BOCU1_TRAIL_COUNT;
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* count==3 */ {
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pRx->diff=c+t*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT;
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pRx->count=count-1;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 decoder function.
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure;
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        the initial values should be 0 which
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *        decodeBocu1 will set to actual initial state values
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b an input byte
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      0..0x10ffff for a result code point
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *      -1 if only the state changed without code point output
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *     <-1 if an error occurs
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b) {
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev, c, count;
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pRx==NULL) {
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* illegal argument */
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -99;
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=pRx->prev;
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(prev==0) {
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* lenient handling of initial 0 values */
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prev=pRx->prev=BOCU1_ASCII_PREV;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count=pRx->count=0;
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        count=pRx->count;
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(count==0) {
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* byte in lead position */
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b<=0x20) {
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Direct-encoded C0 control code or space.
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Reset prev for C0 control codes but not for space.
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(b!=0x20) {
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pRx->prev=BOCU1_ASCII_PREV;
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return b;
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * b is a difference lead byte.
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Return a code point directly from a single-byte difference.
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * For multi-byte difference lead bytes, set the decoder state
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * with the partial difference value from the lead byte and
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * with the number of trail bytes.
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         *
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * For four-byte differences, the signedness also affects the
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * first trail byte, which has special handling farther below.
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b>=BOCU1_START_NEG_2 && b<BOCU1_START_POS_2) {
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* single-byte difference */
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=prev+((int32_t)b-BOCU1_MIDDLE);
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=bocu1Prev(c);
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return c;
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(b==BOCU1_RESET) {
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* only reset the state, no code point */
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pRx->prev=BOCU1_ASCII_PREV;
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return decodeBocu1LeadByte(pRx, b);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in any position */
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return decodeBocu1TrailByte(pRx, b);
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1tst.c ------------------------------- */
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code ---------------------------------------------------------------- */
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code options */
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ignore comma when processing name lists in testText() */
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_IGNORE_COMMA       1
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Write a packed BOCU-1 byte sequence into a byte array,
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * without overflow check.
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence, see packDiff()
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to byte array
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwritePacked(int32_t packed, uint8_t *p) {
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count=BOCU1_LENGTH_FROM_PACKED(packed);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>24);
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>16);
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)(packed>>8);
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *p++=(uint8_t)packed;
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return count;
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unpack a packed BOCU-1 non-C0/space byte sequence and get
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the difference to initialPrev.
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Used only for round-trip testing of the difference encoding and decoding.
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param initialPrev bogus "previous code point" value to make sure that
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *                    the resulting code point is in the range 0..0x10ffff
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the difference to initialPrev
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see writeDiff
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruunpackDiff(int32_t initialPrev, int32_t packed) {
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Bocu1Rx rx={ 0, 0, 0 };
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count;
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rx.prev=initialPrev;
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=BOCU1_LENGTH_FROM_PACKED(packed);
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>24));
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>16));
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        decodeBocu1(&rx, (uint8_t)(packed>>8));
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* subtract initial prev */
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return decodeBocu1(&rx, (uint8_t)packed)-initialPrev;
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -0x7fffffff;
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes,
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order.
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Also checks for roundtripping of the difference encoding.
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function.
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value to test, -0x10ffff..0x10ffff
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return p advanced by number of bytes output
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see unpackDiff
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint8_t *
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteDiff(int32_t diff, uint8_t *p) {
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* generate the difference as a packed value and serialize it */
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t packed, initialPrev;
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    packed=packDiff(diff);
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * bogus initial "prev" to work around
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * code point range check in decodeBocu1()
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff<=0) {
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initialPrev=0x10ffff;
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initialPrev=-1;
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(diff!=unpackDiff(initialPrev, packed)) {
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n",
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                diff, packed, unpackDiff(initialPrev, packed));
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return p+writePacked(packed, p);
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a UTF-16 string in BOCU-1.
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function.
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s input UTF-16 string
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of UChar code units in s
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes output
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteString(const UChar *s, int32_t length, uint8_t *p) {
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *p0;
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, prev, i;
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prev=0;
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    p0=p;
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i<length) {
71983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        U16_NEXT(s, i, length, c);
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        p+=writePacked(encodeBocu1(&prev, c), p);
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int32_t)(p-p0);
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Decode a BOCU-1 byte sequence to a UTF-16 string.
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function.
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to input BOCU-1 bytes
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of input bytes
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s point to output UTF-16 string array
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of UChar code units output
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerureadString(const uint8_t *p, int32_t length, UChar *s) {
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Bocu1Rx rx={ 0, 0, 0 };
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c, i, sLength;
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=sLength=0;
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i<length) {
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=decodeBocu1(&rx, p[i++]);
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c<-1) {
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_err("error: readString detects encoding error at string index %ld\n", i);
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c>=0) {
74783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            U16_APPEND_UNSAFE(s, sLength, c);
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return sLength;
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic char
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruhexDigit(uint8_t digit) {
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Pretty-print 0-terminated byte values.
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Helper function for test output.
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bytes 0-terminated byte array to print
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprintBytes(uint8_t *bytes, char *out) {
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t b;
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while((b=*bytes++)!=0) {
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=' ';
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b>>4));
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b&0xf));
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++i;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=3*(5-i);
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(i>0) {
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *out++=' ';
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --i;
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *out=0;
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic BOCU-1 test function, called when there are no command line arguments.
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prints some of the #define values and performs round-trip tests of the
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * difference encoding and decoding.
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1RefDiff(void) {
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf1[80], buf2[80];
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t prev[5], level[5];
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, cmp, countErrors;
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of single bytes: %ld\n", 1+BOCU1_REACH_POS_1-BOCU1_REACH_NEG_1);
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of 2 bytes     : %ld\n", 1+BOCU1_REACH_POS_2-BOCU1_REACH_NEG_2);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("reach of 3 bytes     : %ld\n\n", 1+BOCU1_REACH_POS_3-BOCU1_REACH_NEG_3);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_1 %8ld    BOCU1_REACH_POS_1 %8ld\n", BOCU1_REACH_NEG_1, BOCU1_REACH_POS_1);
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_2 %8ld    BOCU1_REACH_POS_2 %8ld\n", BOCU1_REACH_NEG_2, BOCU1_REACH_POS_2);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_REACH_NEG_3 %8ld    BOCU1_REACH_POS_3 %8ld\n\n", BOCU1_REACH_NEG_3, BOCU1_REACH_POS_3);
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_MIDDLE      0x%02x\n", BOCU1_MIDDLE);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_START_NEG_2 0x%02x    BOCU1_START_POS_2 0x%02x\n", BOCU1_START_NEG_2, BOCU1_START_POS_2);
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("    BOCU1_START_NEG_3 0x%02x    BOCU1_START_POS_3 0x%02x\n\n", BOCU1_START_NEG_3, BOCU1_START_POS_3);
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* test packDiff() & unpackDiff() with some specific values */
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(0, level);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(1, level);
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(65, level);
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(130, level);
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(30000, level);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(1000000, level);
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-65, level);
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-130, level);
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-30000, level);
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writeDiff(-1000000, level);
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* test that each value is smaller than any following one */
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    countErrors=0;
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=-0x10ffff;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *writeDiff(i, prev)=0;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* show first number and bytes */
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes(prev, buf1);
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("              wD(%8ld)                    %s\n", i, buf1);
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(++i; i<=0x10ffff; ++i) {
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *writeDiff(i, level)=0;
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cmp=strcmp((const char *)prev, (const char *)level);
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(BOCU1_LENGTH_FROM_LEAD(level[0])!=(int32_t)strlen((const char *)level)) {
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_verbose("BOCU1_LENGTH_FROM_LEAD(0x%02x)=%ld!=%ld=strlen(writeDiff(%ld))\n",
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   level[0], BOCU1_LENGTH_FROM_LEAD(level[0]), strlen((const char *)level), i);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(cmp<0) {
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(i==0 || i==1 || strlen((const char *)prev)!=strlen((const char *)level)) {
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * if the result is good, then print only if the length changed
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * to get little but interesting output
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printBytes(prev, buf1);
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printBytes(level, buf2);
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                log_verbose("ok:    strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++countErrors;
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printBytes(prev, buf1);
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printBytes(level, buf2);
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            log_verbose("wrong: strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* remember the previous bytes */
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        memcpy(prev, level, 4);
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* show last number and bytes */
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes((uint8_t *)"", buf1);
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printBytes(prev, buf2);
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("                            wD(%8ld)      %s%s\n", i-1, buf1, buf2);
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(countErrors==0) {
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_verbose("writeDiff(-0x10ffff..0x10ffff) works fine\n");
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("writeDiff(-0x10ffff..0x10ffff) violates lexical ordering in %d cases\n", countErrors);
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* output signature byte sequence */
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i=0;
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    writePacked(encodeBocu1(&i, 0xfeff), level);
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    log_verbose("\nBOCU-1 signature byte sequence: %02x %02x %02x\n",
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            level[0], level[1], level[2]);
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* cintltst code ------------------------------------------------------------ */
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t DEFAULT_BUFFER_SIZE = 30000;
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test one string with the ICU and the reference BOCU-1 implementations */
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruroundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) {
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *roundtripRef, *roundtripICU;
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bocu1Ref, *bocu1ICU;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode;
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1Ref = malloc(DEFAULT_BUFFER_SIZE);
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1ICU = malloc(DEFAULT_BUFFER_SIZE);
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Unicode -> BOCU-1 */
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref);
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode);
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup;
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) {
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength);
904b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup;
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* BOCU-1 -> Unicode */
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef);
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(roundtripRefLength<0) {
910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup; /* readString() found an error and reported it */
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode);
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) {
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength);
921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup;
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) {
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength);
925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        goto cleanup;
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehocleanup:
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(roundtripRef);
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(roundtripICU);
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(bocu1Ref);
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(bocu1ICU);
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar feff[]={ 0xfeff };
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ascii[]={ 0x61, 0x62, 0x20, 0x63, 0x61 };
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar crlf[]={ 0xd, 0xa, 0x20 };
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar nul[]={ 0 };
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar latin[]={ 0xdf, 0xe6 };
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar devanagari[]={ 0x930, 0x20, 0x918, 0x909 };
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hiragana[]={ 0x3086, 0x304d, 0x20, 0x3053, 0x4000 };
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar unihan[]={ 0x4e00, 0x7777, 0x20, 0x9fa5, 0x4e00 };
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hangul[]={ 0xac00, 0xbcde, 0x20, 0xd7a3 };
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar surrogates[]={ 0xdc00, 0xd800 }; /* single surrogates, unmatched! */
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane1[]={ 0xd800, 0xdc00 };
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane2[]={ 0xd845, 0xdddd };
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 };
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane16[]={ 0xdbff, 0xdfff };
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar c0[]={ 1, 0xe40, 0x20, 9 };
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct {
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s;
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length;
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} strings[]={
954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { feff,         UPRV_LENGTHOF(feff) },
955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { ascii,        UPRV_LENGTHOF(ascii) },
956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { crlf,         UPRV_LENGTHOF(crlf) },
957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { nul,          UPRV_LENGTHOF(nul) },
958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { latin,        UPRV_LENGTHOF(latin) },
959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { devanagari,   UPRV_LENGTHOF(devanagari) },
960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { hiragana,     UPRV_LENGTHOF(hiragana) },
961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { unihan,       UPRV_LENGTHOF(unihan) },
962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { hangul,       UPRV_LENGTHOF(hangul) },
963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { surrogates,   UPRV_LENGTHOF(surrogates) },
964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { plane1,       UPRV_LENGTHOF(plane1) },
965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { plane2,       UPRV_LENGTHOF(plane2) },
966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { plane15,      UPRV_LENGTHOF(plane15) },
967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { plane16,      UPRV_LENGTHOF(plane16) },
968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    { c0,           UPRV_LENGTHOF(c0) }
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that the ICU BOCU-1 implementation produces the same results as
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reference implementation from the design folder.
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate some texts and convert them with both converters, verifying
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * identical results and roundtripping.
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1(void) {
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *text;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, length;
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *bocu1;
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode;
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bocu1=ucnv_open("BOCU-1", &errorCode);
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
9881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        log_data_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode));
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 1: each of strings[] once */
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 1, text, length);
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 2: each of strings[] twice */
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
1004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 2, text, length);
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=0;
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=1; length<5000; i+=7) {
1015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if(i>=UPRV_LENGTHOF(strings)) {
1016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            i-=UPRV_LENGTHOF(strings);
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_memcpy(text+length, strings[i].s, strings[i].length);
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length+=strings[i].length;
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    roundtripBOCU1(bocu1, 3, text, length);
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(bocu1);
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    free(text);
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void addBOCU1Tests(TestNode** root);
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddBOCU1Tests(TestNode** root) {
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff");
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1");
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1034