164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 61b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2002-2015, International Business Machines 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: bocu1tst.c 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002may27 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the reference implementation of BOCU-1, 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the MIME-friendly form of the Binary Ordered Compression for Unicode, 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/ 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The files bocu1.h and bocu1.c from the design folder are taken 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* verbatim (minus copyright and #include) and copied together into this file. 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* The reference code and some of the reference bocu1tst.c 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* is modified to run as part of the ICU cintltst 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* test framework (minus main(), log_ln() etc. instead of printf()). 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This reference implementation is used here to verify 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the ICU BOCU-1 implementation, which is 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* adapted for ICU conversion APIs and optimized. 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* ### links in design doc to here and to ucnvbocu.c 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cintltst.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */ 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 constants and macros ---------------------------------------------- */ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encodes the code points of a Unicode string as 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a sequence of byte-encoded differences (slope detection), 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order. 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Optimize the difference-taking for runs of Unicode text within 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * small scripts: 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Most small scripts are allocated within aligned 128-blocks of Unicode 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points. Lexical order is preserved if the "previous code point" state 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is always moved into the middle of such a block. 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Additionally, "prev" is moved from anywhere in the Unihan and Hangul 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * areas into the middle of those areas. 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * C0 control codes and space are encoded with their US-ASCII bytes. 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "prev" is reset for C0 controls but not for space. 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* initial value for "prev": middle of the ASCII range */ 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_ASCII_PREV 0x40 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* bounding byte values for differences */ 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIN 0x21 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MIDDLE 0x90 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_LEAD 0xfe 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */ 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_MAX_TRAIL 0xffL 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_RESET 0xff 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes */ 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* adjust trail byte counts for the use of some C0 control byte values */ 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_CONTROLS_COUNT 20 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of trail bytes */ 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of positive and negative single-byte codes 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (counting 0==BOCU1_MIDDLE among the positive ones) 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_SINGLE 64 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* number of lead bytes for positive and negative 2/3/4-byte sequences */ 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_2 43 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_3 3 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LEAD_4 1 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for single-byters. */ 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for double-byters. */ 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The difference value range for 3-byters. */ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_POS_3 \ 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The lead byte start values. */ 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ==BOCU1_MAX_LEAD */ 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ==BOCU1_MIN+1 */ 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_LEAD(lead) \ 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \ 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \ 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4) 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* The length of a byte sequence, according to its packed form. */ 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_LENGTH_FROM_PACKED(packed) \ 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 12 commonly used C0 control codes (and space) are only used to encode 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * themselves directly, 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which makes BOCU-1 MIME-usable and reasonably safe for 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII-oriented software. 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * These controls are 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0 NUL 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 7 BEL 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 8 BS 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 9 TAB 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a LF 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b VT 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c FF 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * d CR 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * e SO 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * f SI 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1a SUB 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1b ESC 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The other 20 C0 controls are also encoded directly (to preserve order) 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * but are also used as trail bytes in difference encoding 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (for better compression). 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes, 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from external byte values 0x00..0x20 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to trail byte values 0..19 (0..0x13) as used in the difference calculation. 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * External byte values that are illegal as trail bytes are mapped to -1. 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1ByteToTrail[BOCU1_MIN]={ 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 8 9 a b c d e f */ 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1, -1, -1, -1, -1, -1, -1, -1, 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 10 11 12 13 14 15 16 17 */ 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 18 19 1a 1b 1c 1d 1e 1f */ 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 20 */ 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru -1 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte value map for control codes, 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from trail byte values 0..19 (0..0x13) as used in the difference calculation 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to external byte values 0x00..0x20. 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int8_t 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 */ 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 8 9 a b c d e f */ 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 10 11 12 13 */ 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x1c, 0x1d, 0x1e, 0x1f 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Integer division and modulo with negative numerators 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * yields negative modulo results and quotients that are one more than 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * what we need here. 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This macro adjust the results so that the modulo-value m is always >=0. 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For positive n, the if() condition is always FALSE. 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param n Number to be split into quotient and rest. 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Will be modified to contain the quotient. 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param d Divisor. 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param m Output variable for the rest (modulo result). 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NEGDIVMOD(n, d, m) { \ 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (m)=(n)%(d); \ 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (n)/=(d); \ 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((m)<0) { \ 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --(n); \ 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (m)+=(d); \ 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* State for BOCU-1 decoder function. */ 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct Bocu1Rx { 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev, count, diff; 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct Bocu1Rx Bocu1Rx; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Function prototypes ------------------------------------------------------ */ 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* see bocu1.c */ 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff); 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b); 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1.c ---------------------------------- */ 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* BOCU-1 implementation functions ------------------------------------------ */ 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compute the next "previous" value for differencing 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the current code point. 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c current code point, 0..0x10ffff 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return "previous code point" state value 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 25483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic int32_t 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querubocu1Prev(int32_t c) { 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* compute new prev */ 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0x3040<=c && c<=0x309f) { 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Hiragana is not 128-aligned */ 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x3070; 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0x4e00<=c && c<=0x9fa5) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* CJK Unihan */ 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x4e00-BOCU1_REACH_NEG_2; 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0xac00<=c && c<=0xd7a3) { 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */ 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((int32_t)0xd7a3+(int32_t)0xac00)/2; 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mostly small scripts */ 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (c&~0x7f)+BOCU1_ASCII_PREV; 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return a packed integer with them. 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The encoding favors small absolut differences with short encodings 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to compress runs of same-script characters. 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value -0x10ffff..0x10ffff 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x010000zz for 1-byte sequence zz 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x0200yyzz for 2-byte sequence yy zz 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0x03xxyyzz for 3-byte sequence xx yy zz 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerupackDiff(int32_t diff) { 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, m, lead, count, shift; 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_1) { 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mostly positive differences, and single-byte negative ones */ 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff<=BOCU1_REACH_POS_1) { 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* single byte */ 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x01000000|(BOCU1_MIDDLE+diff); 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff<=BOCU1_REACH_POS_2) { 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_1+1; 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_2; 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff<=BOCU1_REACH_POS_3) { 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_2+1; 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_3; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_POS_3+1; 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_POS_4; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two- and four-byte negative differences */ 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff>=BOCU1_REACH_NEG_2) { 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_1; 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_2; 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(diff>=BOCU1_REACH_NEG_3) { 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_2; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_3; 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff-=BOCU1_REACH_NEG_3; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lead=BOCU1_START_NEG_4; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* encode the length of the packed result */ 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<3) { 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=(count+1)<<24; 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* count==3, MSB used for the lead byte */ { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=0; 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* calculate trail bytes like digits in itoa() */ 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru shift=0; 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result|=BOCU1_TRAIL_TO_BYTE(m)<<shift; 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru shift+=8; 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count>0); 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add lead byte */ 34764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert result |= (uint32_t)(lead+diff)<<shift; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 encoder function. 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pPrev pointer to the integer that holds 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the "previous code point" state; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial value should be 0 which 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * encodeBocu1 will set to the actual BOCU-1 initial state value 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c the code point to encode 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the packed 1/2/3/4-byte encoding, see packDiff(), 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or 0 if an error occurs 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruencodeBocu1(int32_t *pPrev, int32_t c) { 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev; 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pPrev==NULL || c<0 || c>0x10ffff) { 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal argument */ 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=*pPrev; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev==0) { 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lenient handling of initial value 0 */ 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=*pPrev=BOCU1_ASCII_PREV; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<=0x20) { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ISO C0 control & space: 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode directly for MIME compatibility, 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and reset state except for space, to not disrupt compression. 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c!=0x20) { 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pPrev=BOCU1_ASCII_PREV; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x01000000|c; 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * all other Unicode code points c==U+0021..U+10ffff 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are encoded with the difference c-prev 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a new prev is computed from c, 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * placed in the middle of a 0x80-block (for most small scripts) or 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in the middle of the Unihan and Hangul blocks 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to statistically minimize the following difference 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pPrev=bocu1Prev(c); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return packDiff(c-prev); 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte lead bytes. 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b lead byte; 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<=BOCU1_MAX_LEAD 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return -1 (state change only) 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1LeadByte(Bocu1Rx *pRx, uint8_t b) { 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, count; 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_2) { 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* positive difference */ 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<BOCU1_START_POS_3) { 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b<BOCU1_START_POS_4) { 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=BOCU1_REACH_POS_3+1; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* negative difference */ 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_3) { 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* two bytes */ 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=1; 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b>BOCU1_MIN) { 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* three bytes */ 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=2; 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* four bytes */ 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=3; 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set the state for decoding the trail byte(s) */ 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=count; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Function for BOCU-1 decoder; handles multi-byte trail bytes. 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b trail byte 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return result value, same as decodeBocu1 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see decodeBocu1 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1TrailByte(Bocu1Rx *pRx, uint8_t b) { 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t, c, count; 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<=0x20) { 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* skip some C0 controls and make the trail byte range contiguous */ 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=bocu1ByteToTrail[b]; 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t<0) { 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal trail byte value */ 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if BOCU1_MAX_TRAIL<0xff 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b>BOCU1_MAX_TRAIL) { 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=(int32_t)b-BOCU1_TRAIL_BYTE_OFFSET; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* add trail byte into difference and decrement count */ 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=pRx->diff; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count; 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==1) { 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* final trail byte, deliver a code point */ 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=pRx->prev+c+t; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0<=c && c<=0x10ffff) { 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* valid code point result */ 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=bocu1Prev(c); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal code point result */ 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=0; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* intermediate trail byte */ 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==2) { 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c+t*BOCU1_TRAIL_COUNT; 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* count==3 */ { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->diff=c+t*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->count=count-1; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * BOCU-1 decoder function. 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pRx pointer to the decoder state structure; 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial values should be 0 which 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * decodeBocu1 will set to actual initial state values 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param b an input byte 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0..0x10ffff for a result code point 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * -1 if only the state changed without code point output 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <-1 if an error occurs 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerudecodeBocu1(Bocu1Rx *pRx, uint8_t b) { 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev, c, count; 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pRx==NULL) { 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal argument */ 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -99; 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=pRx->prev; 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev==0) { 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lenient handling of initial 0 values */ 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=pRx->prev=BOCU1_ASCII_PREV; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count=0; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=pRx->count; 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count==0) { 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* byte in lead position */ 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b<=0x20) { 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Direct-encoded C0 control code or space. 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reset prev for C0 control codes but not for space. 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b!=0x20) { 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return b; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b is a difference lead byte. 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return a code point directly from a single-byte difference. 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For multi-byte difference lead bytes, set the decoder state 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the partial difference value from the lead byte and 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the number of trail bytes. 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For four-byte differences, the signedness also affects the 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first trail byte, which has special handling farther below. 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b>=BOCU1_START_NEG_2 && b<BOCU1_START_POS_2) { 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* single-byte difference */ 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=prev+((int32_t)b-BOCU1_MIDDLE); 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=bocu1Prev(c); 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b==BOCU1_RESET) { 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* only reset the state, no code point */ 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pRx->prev=BOCU1_ASCII_PREV; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1LeadByte(pRx, b); 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in any position */ 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1TrailByte(pRx, b); 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* icuhtml/design/conversion/bocu1/bocu1tst.c ------------------------------- */ 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code ---------------------------------------------------------------- */ 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test code options */ 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* ignore comma when processing name lists in testText() */ 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_IGNORE_COMMA 1 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Write a packed BOCU-1 byte sequence into a byte array, 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * without overflow check. 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence, see packDiff() 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to byte array 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwritePacked(int32_t packed, uint8_t *p) { 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count=BOCU1_LENGTH_FROM_PACKED(packed); 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>24); 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>16); 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(packed>>8); 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)packed; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return count; 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unpack a packed BOCU-1 non-C0/space byte sequence and get 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the difference to initialPrev. 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Used only for round-trip testing of the difference encoding and decoding. 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param initialPrev bogus "previous code point" value to make sure that 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the resulting code point is in the range 0..0x10ffff 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param packed packed BOCU-1 byte sequence 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the difference to initialPrev 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see packDiff 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see writeDiff 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruunpackDiff(int32_t initialPrev, int32_t packed) { 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Bocu1Rx rx={ 0, 0, 0 }; 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rx.prev=initialPrev; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=BOCU1_LENGTH_FROM_PACKED(packed); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>24)); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>16)); 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decodeBocu1(&rx, (uint8_t)(packed>>8)); 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* subtract initial prev */ 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return decodeBocu1(&rx, (uint8_t)packed)-initialPrev; 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -0x7fffffff; 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes, 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * preserving lexical order. 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Also checks for roundtripping of the difference encoding. 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test function. 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param diff difference value to test, -0x10ffff..0x10ffff 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return p advanced by number of bytes output 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see unpackDiff 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint8_t * 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteDiff(int32_t diff, uint8_t *p) { 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* generate the difference as a packed value and serialize it */ 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t packed, initialPrev; 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru packed=packDiff(diff); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bogus initial "prev" to work around 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point range check in decodeBocu1() 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff<=0) { 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initialPrev=0x10ffff; 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initialPrev=-1; 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(diff!=unpackDiff(initialPrev, packed)) { 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n", 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru diff, packed, unpackDiff(initialPrev, packed)); 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return p+writePacked(packed, p); 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Encode a UTF-16 string in BOCU-1. 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function. 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s input UTF-16 string 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of UChar code units in s 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to output byte array 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of bytes output 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruwriteString(const UChar *s, int32_t length, uint8_t *p) { 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *p0; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, prev, i; 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0=p; 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i<length) { 71983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_NEXT(s, i, length, c); 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p+=writePacked(encodeBocu1(&prev, c), p); 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(p-p0); 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Decode a BOCU-1 byte sequence to a UTF-16 string. 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Does not check for overflows, but otherwise useful function. 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param p pointer to input BOCU-1 bytes 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param length number of input bytes 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s point to output UTF-16 string array 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return number of UChar code units output 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerureadString(const uint8_t *p, int32_t length, UChar *s) { 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Bocu1Rx rx={ 0, 0, 0 }; 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c, i, sLength; 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=sLength=0; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i<length) { 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=decodeBocu1(&rx, p[i++]); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<-1) { 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("error: readString detects encoding error at string index %ld\n", i); 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0) { 74783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_APPEND_UNSAFE(s, sLength, c); 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return sLength; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic char 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruhexDigit(uint8_t digit) { 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Pretty-print 0-terminated byte values. 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Helper function for test output. 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bytes 0-terminated byte array to print 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprintBytes(uint8_t *bytes, char *out) { 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((b=*bytes++)!=0) { 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=' '; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=hexDigit((uint8_t)(b>>4)); 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=hexDigit((uint8_t)(b&0xf)); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++i; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=3*(5-i); 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(i>0) { 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out++=' '; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --i; 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *out=0; 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic BOCU-1 test function, called when there are no command line arguments. 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prints some of the #define values and performs round-trip tests of the 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * difference encoding and decoding. 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1RefDiff(void) { 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf1[80], buf2[80]; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t prev[5], level[5]; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, cmp, countErrors; 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of single bytes: %ld\n", 1+BOCU1_REACH_POS_1-BOCU1_REACH_NEG_1); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of 2 bytes : %ld\n", 1+BOCU1_REACH_POS_2-BOCU1_REACH_NEG_2); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("reach of 3 bytes : %ld\n\n", 1+BOCU1_REACH_POS_3-BOCU1_REACH_NEG_3); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_1 %8ld BOCU1_REACH_POS_1 %8ld\n", BOCU1_REACH_NEG_1, BOCU1_REACH_POS_1); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_2 %8ld BOCU1_REACH_POS_2 %8ld\n", BOCU1_REACH_NEG_2, BOCU1_REACH_POS_2); 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_REACH_NEG_3 %8ld BOCU1_REACH_POS_3 %8ld\n\n", BOCU1_REACH_NEG_3, BOCU1_REACH_POS_3); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_MIDDLE 0x%02x\n", BOCU1_MIDDLE); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_START_NEG_2 0x%02x BOCU1_START_POS_2 0x%02x\n", BOCU1_START_NEG_2, BOCU1_START_POS_2); 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" BOCU1_START_NEG_3 0x%02x BOCU1_START_POS_3 0x%02x\n\n", BOCU1_START_NEG_3, BOCU1_START_POS_3); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test packDiff() & unpackDiff() with some specific values */ 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(0, level); 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(1, level); 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(65, level); 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(130, level); 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(30000, level); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(1000000, level); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-65, level); 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-130, level); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-30000, level); 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writeDiff(-1000000, level); 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test that each value is smaller than any following one */ 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru countErrors=0; 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=-0x10ffff; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *writeDiff(i, prev)=0; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* show first number and bytes */ 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" wD(%8ld) %s\n", i, buf1); 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(++i; i<=0x10ffff; ++i) { 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *writeDiff(i, level)=0; 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cmp=strcmp((const char *)prev, (const char *)level); 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(BOCU1_LENGTH_FROM_LEAD(level[0])!=(int32_t)strlen((const char *)level)) { 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("BOCU1_LENGTH_FROM_LEAD(0x%02x)=%ld!=%ld=strlen(writeDiff(%ld))\n", 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level[0], BOCU1_LENGTH_FROM_LEAD(level[0]), strlen((const char *)level), i); 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(cmp<0) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i==0 || i==1 || strlen((const char *)prev)!=strlen((const char *)level)) { 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the result is good, then print only if the length changed 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to get little but interesting output 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(level, buf2); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("ok: strcmp(wD(%8ld), wD(%8ld))=%2d %s%s\n", i-1, i, cmp, buf1, buf2); 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++countErrors; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf1); 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(level, buf2); 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("wrong: strcmp(wD(%8ld), wD(%8ld))=%2d %s%s\n", i-1, i, cmp, buf1, buf2); 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* remember the previous bytes */ 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memcpy(prev, level, 4); 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* show last number and bytes */ 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes((uint8_t *)"", buf1); 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printBytes(prev, buf2); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose(" wD(%8ld) %s%s\n", i-1, buf1, buf2); 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(countErrors==0) { 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("writeDiff(-0x10ffff..0x10ffff) works fine\n"); 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("writeDiff(-0x10ffff..0x10ffff) violates lexical ordering in %d cases\n", countErrors); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* output signature byte sequence */ 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=0; 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru writePacked(encodeBocu1(&i, 0xfeff), level); 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_verbose("\nBOCU-1 signature byte sequence: %02x %02x %02x\n", 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level[0], level[1], level[2]); 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* cintltst code ------------------------------------------------------------ */ 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t DEFAULT_BUFFER_SIZE = 30000; 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* test one string with the ICU and the reference BOCU-1 implementations */ 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruroundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) { 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *roundtripRef, *roundtripICU; 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bocu1Ref, *bocu1ICU; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1Ref = malloc(DEFAULT_BUFFER_SIZE); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1ICU = malloc(DEFAULT_BUFFER_SIZE); 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode -> BOCU-1 */ 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref); 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode); 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); 899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) { 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength); 904b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BOCU-1 -> Unicode */ 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef); 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(roundtripRefLength<0) { 910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; /* readString() found an error and reported it */ 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode); 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); 916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) { 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength); 921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength); 925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho goto cleanup; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehocleanup: 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(roundtripRef); 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(roundtripICU); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(bocu1Ref); 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(bocu1ICU); 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar feff[]={ 0xfeff }; 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ascii[]={ 0x61, 0x62, 0x20, 0x63, 0x61 }; 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar crlf[]={ 0xd, 0xa, 0x20 }; 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar nul[]={ 0 }; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar latin[]={ 0xdf, 0xe6 }; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar devanagari[]={ 0x930, 0x20, 0x918, 0x909 }; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hiragana[]={ 0x3086, 0x304d, 0x20, 0x3053, 0x4000 }; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar unihan[]={ 0x4e00, 0x7777, 0x20, 0x9fa5, 0x4e00 }; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar hangul[]={ 0xac00, 0xbcde, 0x20, 0xd7a3 }; 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar surrogates[]={ 0xdc00, 0xd800 }; /* single surrogates, unmatched! */ 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane1[]={ 0xd800, 0xdc00 }; 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane2[]={ 0xd845, 0xdddd }; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 }; 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar plane16[]={ 0xdbff, 0xdfff }; 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar c0[]={ 1, 0xe40, 0x20, 9 }; 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct { 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} strings[]={ 954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { feff, UPRV_LENGTHOF(feff) }, 955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { ascii, UPRV_LENGTHOF(ascii) }, 956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { crlf, UPRV_LENGTHOF(crlf) }, 957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { nul, UPRV_LENGTHOF(nul) }, 958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { latin, UPRV_LENGTHOF(latin) }, 959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { devanagari, UPRV_LENGTHOF(devanagari) }, 960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { hiragana, UPRV_LENGTHOF(hiragana) }, 961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { unihan, UPRV_LENGTHOF(unihan) }, 962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { hangul, UPRV_LENGTHOF(hangul) }, 963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { surrogates, UPRV_LENGTHOF(surrogates) }, 964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { plane1, UPRV_LENGTHOF(plane1) }, 965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { plane2, UPRV_LENGTHOF(plane2) }, 966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { plane15, UPRV_LENGTHOF(plane15) }, 967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { plane16, UPRV_LENGTHOF(plane16) }, 968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { c0, UPRV_LENGTHOF(c0) } 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that the ICU BOCU-1 implementation produces the same results as 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reference implementation from the design folder. 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate some texts and convert them with both converters, verifying 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * identical results and roundtripping. 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTestBOCU1(void) { 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *text; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, length; 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *bocu1; 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bocu1=ucnv_open("BOCU-1", &errorCode); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 9881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert log_data_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode)); 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 1: each of strings[] once */ 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<UPRV_LENGTHOF(strings); ++i) { 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 1, text, length); 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 2: each of strings[] twice */ 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 1004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<UPRV_LENGTHOF(strings); ++i) { 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 2, text, length); 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */ 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=1; length<5000; i+=7) { 1015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i>=UPRV_LENGTHOF(strings)) { 1016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius i-=UPRV_LENGTHOF(strings); 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(text+length, strings[i].s, strings[i].length); 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length+=strings[i].length; 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru roundtripBOCU1(bocu1, 3, text, length); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(bocu1); 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru free(text); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void addBOCU1Tests(TestNode** root); 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddBOCU1Tests(TestNode** root) { 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff"); 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1"); 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1034