1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2002-2010, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: bocu1tst.c 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002may27 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the reference implementation of BOCU-1, 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the MIME-friendly form of the Binary Ordered Compression for Unicode, 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/ 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The files bocu1.h and bocu1.c from the design folder are taken 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* verbatim (minus copyright and #include) and copied together into this file. 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The reference code and some of the reference bocu1tst.c 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is modified to run as part of the ICU cintltst 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* test framework (minus main(), log_ln() etc. instead of printf()). 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This reference implementation is used here to verify 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the ICU BOCU-1 implementation, which is 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* adapted for ICU conversion APIs and optimized. 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* ### links in design doc to here and to ucnvbocu.c 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */ 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */ 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection), 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order. 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * small scripts: 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is always moved into the middle of such a block. 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * areas into the middle of those areas. 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes. 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space. 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */ 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_ASCII_PREV 0x40 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* bounding byte values for differences */ 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIN 0x21 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIDDLE 0x90 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_LEAD 0xfe 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */ 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_TRAIL 0xffL 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_RESET 0xff 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes */ 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */ 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT 20 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of trail bytes */ 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of positive and negative single-byte codes 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones) 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_SINGLE 64 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */ 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_2 43 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_3 3 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_4 1 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for single-byters. */ 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for double-byters. */ 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for 3-byters. */ 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_3 \ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The lead byte start values. */ 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ==BOCU1_MAX_LEAD */ 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ==BOCU1_MIN+1 */ 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \ 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \ 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \ 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4) 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */ 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \ 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * themselves directly, 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII-oriented software. 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * These controls are 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0 NUL 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 7 BEL 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 8 BS 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 9 TAB 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a LF 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b VT 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c FF 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * d CR 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * e SO 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * f SI 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1a SUB 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1b ESC 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order) 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but are also used as trail bytes in difference encoding 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (for better compression). 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes, 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from external byte values 0x00..0x20 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation. 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1. 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={ 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 8 9 a b c d e f */ 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1, -1, -1, -1, -1, -1, -1, -1, 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 10 11 12 13 14 15 16 17 */ 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 18 19 1a 1b 1c 1d 1e 1f */ 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 20 */ 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes, 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to external byte values 0x00..0x20. 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 8 9 a b c d e f */ 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 10 11 12 13 */ 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x1c, 0x1d, 0x1e, 0x1f 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Integer division and modulo with negative numerators 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * what we need here. 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0. 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For positive n, the if() condition is always FALSE. 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param n Number to be split into quotient and rest. 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Will be modified to contain the quotient. 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param d Divisor. 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param m Output variable for the rest (modulo result). 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \ 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (m)=(n)%(d); \ 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (n)/=(d); \ 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((m)<0) { \ 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --(n); \ 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (m)+=(d); \ 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* State for BOCU-1 decoder function. */ 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Bocu1Rx { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev, count, diff; 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct Bocu1Rx Bocu1Rx; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Function prototypes ------------------------------------------------------ */ 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* see bocu1.c */ 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c); 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.c ---------------------------------- */ 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */ 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compute the next "previous" value for differencing 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the current code point. 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c current code point, 0..0x10ffff 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return "previous code point" state value 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE int32_t 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1Prev(int32_t c) { 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* compute new prev */ 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0x3040<=c && c<=0x309f) { 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Hiragana is not 128-aligned */ 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x3070; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0x4e00<=c && c<=0x9fa5) { 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* CJK Unihan */ 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x4e00-BOCU1_REACH_NEG_2; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0xac00<=c && c<=0xd7a3) { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */ 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((int32_t)0xd7a3+(int32_t)0xac00)/2; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mostly small scripts */ 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (c&~0x7f)+BOCU1_ASCII_PREV; 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return a packed integer with them. 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The encoding favors small absolut differences with short encodings 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to compress runs of same-script characters. 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x010000zz for 1-byte sequence zz 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x0200yyzz for 2-byte sequence yy zz 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x03xxyyzz for 3-byte sequence xx yy zz 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff) { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, m, lead, count, shift; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_1) { 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mostly positive differences, and single-byte negative ones */ 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff<=BOCU1_REACH_POS_1) { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* single byte */ 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x01000000|(BOCU1_MIDDLE+diff); 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff<=BOCU1_REACH_POS_2) { 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_1+1; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_2; 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff<=BOCU1_REACH_POS_3) { 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_2+1; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_3; 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_3+1; 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_4; 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two- and four-byte negative differences */ 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_2) { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_1; 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_2; 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff>=BOCU1_REACH_NEG_3) { 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_2; 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_3; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_3; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_4; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* encode the length of the packed result */ 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<3) { 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=(count+1)<<24; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* count==3, MSB used for the lead byte */ { 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=0; 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* calculate trail bytes like digits in itoa() */ 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru shift=0; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<shift; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru shift+=8; 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count>0); 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add lead byte */ 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result|=(lead+diff)<<shift; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encoder function. 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pPrev pointer to the integer that holds 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the "previous code point" state; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial value should be 0 which 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * encodeBocu1 will set to the actual BOCU-1 initial state value 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c the code point to encode 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the packed 1/2/3/4-byte encoding, see packDiff(), 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or 0 if an error occurs 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c) { 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pPrev==NULL || c<0 || c>0x10ffff) { 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal argument */ 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=*pPrev; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev==0) { 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lenient handling of initial value 0 */ 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=*pPrev=BOCU1_ASCII_PREV; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<=0x20) { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ISO C0 control & space: 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode directly for MIME compatibility, 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and reset state except for space, to not disrupt compression. 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c!=0x20) { 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pPrev=BOCU1_ASCII_PREV; 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x01000000|c; 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * all other Unicode code points c==U+0021..U+10ffff 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are encoded with the difference c-prev 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a new prev is computed from c, 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * placed in the middle of a 0x80-block (for most small scripts) or 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in the middle of the Unihan and Hangul blocks 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to statistically minimize the following difference 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pPrev=bocu1Prev(c); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return packDiff(c-prev); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes. 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b lead byte; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<=BOCU1_MAX_LEAD 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 (state change only) 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1LeadByte(Bocu1Rx *pRx, uint8_t b) { 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, count; 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_2) { 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* positive difference */ 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<BOCU1_START_POS_3) { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b<BOCU1_START_POS_4) { 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1; 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=BOCU1_REACH_POS_3+1; 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* negative difference */ 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_3) { 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b>BOCU1_MIN) { 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set the state for decoding the trail byte(s) */ 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c; 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=count; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes. 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b trail byte 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return result value, same as decodeBocu1 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1TrailByte(Bocu1Rx *pRx, uint8_t b) { 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t, c, count; 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<=0x20) { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* skip some C0 controls and make the trail byte range contiguous */ 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=bocu1ByteToTrail[b]; 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t<0) { 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal trail byte value */ 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b>BOCU1_MAX_TRAIL) { 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=(int32_t)b-BOCU1_TRAIL_BYTE_OFFSET; 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add trail byte into difference and decrement count */ 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=pRx->diff; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==1) { 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* final trail byte, deliver a code point */ 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=pRx->prev+c+t; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0<=c && c<=0x10ffff) { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* valid code point result */ 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=bocu1Prev(c); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal code point result */ 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* intermediate trail byte */ 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==2) { 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c+t*BOCU1_TRAIL_COUNT; 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* count==3 */ { 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c+t*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT; 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=count-1; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 decoder function. 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure; 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial values should be 0 which 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * decodeBocu1 will set to actual initial state values 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b an input byte 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0..0x10ffff for a result code point 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * -1 if only the state changed without code point output 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <-1 if an error occurs 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b) { 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev, c, count; 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pRx==NULL) { 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal argument */ 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=pRx->prev; 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev==0) { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lenient handling of initial 0 values */ 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=pRx->prev=BOCU1_ASCII_PREV; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count=0; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==0) { 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* byte in lead position */ 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<=0x20) { 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Direct-encoded C0 control code or space. 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reset prev for C0 control codes but not for space. 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b!=0x20) { 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return b; 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b is a difference lead byte. 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return a code point directly from a single-byte difference. 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For multi-byte difference lead bytes, set the decoder state 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the partial difference value from the lead byte and 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the number of trail bytes. 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For four-byte differences, the signedness also affects the 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first trail byte, which has special handling farther below. 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_2 && b<BOCU1_START_POS_2) { 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* single-byte difference */ 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=prev+((int32_t)b-BOCU1_MIDDLE); 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=bocu1Prev(c); 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b==BOCU1_RESET) { 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* only reset the state, no code point */ 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1LeadByte(pRx, b); 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in any position */ 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1TrailByte(pRx, b); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1tst.c ------------------------------- */ 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code ---------------------------------------------------------------- */ 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code options */ 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ignore comma when processing name lists in testText() */ 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_IGNORE_COMMA 1 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Write a packed BOCU-1 byte sequence into a byte array, 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * without overflow check. 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence, see packDiff() 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to byte array 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwritePacked(int32_t packed, uint8_t *p) { 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count=BOCU1_LENGTH_FROM_PACKED(packed); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>24); 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>16); 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>8); 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)packed; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return count; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unpack a packed BOCU-1 non-C0/space byte sequence and get 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the difference to initialPrev. 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Used only for round-trip testing of the difference encoding and decoding. 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param initialPrev bogus "previous code point" value to make sure that 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the resulting code point is in the range 0..0x10ffff 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the difference to initialPrev 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see writeDiff 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruunpackDiff(int32_t initialPrev, int32_t packed) { 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Bocu1Rx rx={ 0, 0, 0 }; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rx.prev=initialPrev; 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=BOCU1_LENGTH_FROM_PACKED(packed); 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>24)); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>16)); 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>8)); 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* subtract initial prev */ 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1(&rx, (uint8_t)packed)-initialPrev; 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -0x7fffffff; 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes, 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order. 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Also checks for roundtripping of the difference encoding. 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value to test, -0x10ffff..0x10ffff 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return p advanced by number of bytes output 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see unpackDiff 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint8_t * 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteDiff(int32_t diff, uint8_t *p) { 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* generate the difference as a packed value and serialize it */ 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t packed, initialPrev; 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru packed=packDiff(diff); 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bogus initial "prev" to work around 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point range check in decodeBocu1() 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff<=0) { 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initialPrev=0x10ffff; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initialPrev=-1; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff!=unpackDiff(initialPrev, packed)) { 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n", 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff, packed, unpackDiff(initialPrev, packed)); 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return p+writePacked(packed, p); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a UTF-16 string in BOCU-1. 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function. 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s input UTF-16 string 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of UChar code units in s 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes output 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteString(const UChar *s, int32_t length, uint8_t *p) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *p0; 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, prev, i; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0=p; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i<length) { 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_NEXT_CHAR(s, i, length, c); 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p+=writePacked(encodeBocu1(&prev, c), p); 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(p-p0); 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Decode a BOCU-1 byte sequence to a UTF-16 string. 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function. 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to input BOCU-1 bytes 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of input bytes 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s point to output UTF-16 string array 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of UChar code units output 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerureadString(const uint8_t *p, int32_t length, UChar *s) { 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Bocu1Rx rx={ 0, 0, 0 }; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, i, sLength; 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=sLength=0; 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i<length) { 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=decodeBocu1(&rx, p[i++]); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<-1) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("error: readString detects encoding error at string index %ld\n", i); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0) { 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_APPEND_CHAR_UNSAFE(s, sLength, c); 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sLength; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE char 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruhexDigit(uint8_t digit) { 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Pretty-print 0-terminated byte values. 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Helper function for test output. 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bytes 0-terminated byte array to print 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprintBytes(uint8_t *bytes, char *out) { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*bytes++)!=0) { 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=' '; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=hexDigit((uint8_t)(b>>4)); 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=hexDigit((uint8_t)(b&0xf)); 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++i; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=3*(5-i); 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i>0) { 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=' '; 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --i; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out=0; 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic BOCU-1 test function, called when there are no command line arguments. 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prints some of the #define values and performs round-trip tests of the 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * difference encoding and decoding. 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1RefDiff(void) { 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf1[80], buf2[80]; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prev[5], level[5]; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, cmp, countErrors; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of single bytes: %ld\n", 1+BOCU1_REACH_POS_1-BOCU1_REACH_NEG_1); 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of 2 bytes : %ld\n", 1+BOCU1_REACH_POS_2-BOCU1_REACH_NEG_2); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of 3 bytes : %ld\n\n", 1+BOCU1_REACH_POS_3-BOCU1_REACH_NEG_3); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_1 %8ld BOCU1_REACH_POS_1 %8ld\n", BOCU1_REACH_NEG_1, BOCU1_REACH_POS_1); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_2 %8ld BOCU1_REACH_POS_2 %8ld\n", BOCU1_REACH_NEG_2, BOCU1_REACH_POS_2); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_3 %8ld BOCU1_REACH_POS_3 %8ld\n\n", BOCU1_REACH_NEG_3, BOCU1_REACH_POS_3); 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_MIDDLE 0x%02x\n", BOCU1_MIDDLE); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_START_NEG_2 0x%02x BOCU1_START_POS_2 0x%02x\n", BOCU1_START_NEG_2, BOCU1_START_POS_2); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_START_NEG_3 0x%02x BOCU1_START_POS_3 0x%02x\n\n", BOCU1_START_NEG_3, BOCU1_START_POS_3); 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test packDiff() & unpackDiff() with some specific values */ 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(0, level); 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(1, level); 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(65, level); 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(130, level); 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(30000, level); 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(1000000, level); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-65, level); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-130, level); 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-30000, level); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-1000000, level); 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test that each value is smaller than any following one */ 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru countErrors=0; 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=-0x10ffff; 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *writeDiff(i, prev)=0; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* show first number and bytes */ 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" wD(%8ld) %s\n", i, buf1); 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(++i; i<=0x10ffff; ++i) { 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *writeDiff(i, level)=0; 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cmp=strcmp((const char *)prev, (const char *)level); 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(BOCU1_LENGTH_FROM_LEAD(level[0])!=(int32_t)strlen((const char *)level)) { 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("BOCU1_LENGTH_FROM_LEAD(0x%02x)=%ld!=%ld=strlen(writeDiff(%ld))\n", 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level[0], BOCU1_LENGTH_FROM_LEAD(level[0]), strlen((const char *)level), i); 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(cmp<0) { 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i==0 || i==1 || strlen((const char *)prev)!=strlen((const char *)level)) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the result is good, then print only if the length changed 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to get little but interesting output 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(level, buf2); 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("ok: strcmp(wD(%8ld), wD(%8ld))=%2d %s%s\n", i-1, i, cmp, buf1, buf2); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++countErrors; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(level, buf2); 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("wrong: strcmp(wD(%8ld), wD(%8ld))=%2d %s%s\n", i-1, i, cmp, buf1, buf2); 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* remember the previous bytes */ 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memcpy(prev, level, 4); 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* show last number and bytes */ 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes((uint8_t *)"", buf1); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf2); 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" wD(%8ld) %s%s\n", i-1, buf1, buf2); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(countErrors==0) { 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("writeDiff(-0x10ffff..0x10ffff) works fine\n"); 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("writeDiff(-0x10ffff..0x10ffff) violates lexical ordering in %d cases\n", countErrors); 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* output signature byte sequence */ 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writePacked(encodeBocu1(&i, 0xfeff), level); 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("\nBOCU-1 signature byte sequence: %02x %02x %02x\n", 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level[0], level[1], level[2]); 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* cintltst code ------------------------------------------------------------ */ 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t DEFAULT_BUFFER_SIZE = 30000; 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test one string with the ICU and the reference BOCU-1 implementations */ 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruroundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) { 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *roundtripRef, *roundtripICU; 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bocu1Ref, *bocu1ICU; 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1Ref = malloc(DEFAULT_BUFFER_SIZE); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1ICU = malloc(DEFAULT_BUFFER_SIZE); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode -> BOCU-1 */ 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref); 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode); 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) { 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BOCU-1 -> Unicode */ 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef); 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(roundtripRefLength<0) { 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho free(roundtripICU); 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* readString() found an error and reported it */ 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode); 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) { 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength); 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength); 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(roundtripRef); 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(roundtripICU); 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(bocu1Ref); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(bocu1ICU); 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar feff[]={ 0xfeff }; 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ascii[]={ 0x61, 0x62, 0x20, 0x63, 0x61 }; 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar crlf[]={ 0xd, 0xa, 0x20 }; 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar nul[]={ 0 }; 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar latin[]={ 0xdf, 0xe6 }; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar devanagari[]={ 0x930, 0x20, 0x918, 0x909 }; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hiragana[]={ 0x3086, 0x304d, 0x20, 0x3053, 0x4000 }; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar unihan[]={ 0x4e00, 0x7777, 0x20, 0x9fa5, 0x4e00 }; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hangul[]={ 0xac00, 0xbcde, 0x20, 0xd7a3 }; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar surrogates[]={ 0xdc00, 0xd800 }; /* single surrogates, unmatched! */ 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane1[]={ 0xd800, 0xdc00 }; 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane2[]={ 0xd845, 0xdddd }; 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 }; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane16[]={ 0xdbff, 0xdfff }; 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar c0[]={ 1, 0xe40, 0x20, 9 }; 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct { 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} strings[]={ 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { feff, LENGTHOF(feff) }, 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { ascii, LENGTHOF(ascii) }, 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { crlf, LENGTHOF(crlf) }, 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { nul, LENGTHOF(nul) }, 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { latin, LENGTHOF(latin) }, 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { devanagari, LENGTHOF(devanagari) }, 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { hiragana, LENGTHOF(hiragana) }, 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { unihan, LENGTHOF(unihan) }, 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { hangul, LENGTHOF(hangul) }, 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { surrogates, LENGTHOF(surrogates) }, 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { plane1, LENGTHOF(plane1) }, 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { plane2, LENGTHOF(plane2) }, 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { plane15, LENGTHOF(plane15) }, 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { plane16, LENGTHOF(plane16) }, 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { c0, LENGTHOF(c0) } 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that the ICU BOCU-1 implementation produces the same results as 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reference implementation from the design folder. 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate some texts and convert them with both converters, verifying 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * identical results and roundtripping. 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1(void) { 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *text; 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, length; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *bocu1; 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1=ucnv_open("BOCU-1", &errorCode); 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode)); 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 1: each of strings[] once */ 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<LENGTHOF(strings); ++i) { 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 1, text, length); 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 2: each of strings[] twice */ 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<LENGTHOF(strings); ++i) { 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 2, text, length); 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */ 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=1; length<5000; i+=7) { 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>=LENGTHOF(strings)) { 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i-=LENGTHOF(strings); 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 3, text, length); 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(bocu1); 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(text); 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void addBOCU1Tests(TestNode** root); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddBOCU1Tests(TestNode** root) { 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff"); 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1"); 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1033