16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2002-2011, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  bocu1tst.c
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2002may27
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   This is the reference implementation of BOCU-1,
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   the MIME-friendly form of the Binary Ordered Compression for Unicode,
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   taken directly from ### http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/bocu1/
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   The files bocu1.h and bocu1.c from the design folder are taken
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   verbatim (minus copyright and #include) and copied together into this file.
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   The reference code and some of the reference bocu1tst.c
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   is modified to run as part of the ICU cintltst
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   test framework (minus main(), log_ln() etc. instead of printf()).
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   This reference implementation is used here to verify
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   the ICU BOCU-1 implementation, which is
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   adapted for ICU conversion APIs and optimized.
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   ### links in design doc to here and to ucnvbocu.c
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h"
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cintltst.h"
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* icuhtml/design/conversion/bocu1/bocu1.h ---------------------------------- */
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* BOCU-1 constants and macros ---------------------------------------------- */
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * BOCU-1 encodes the code points of a Unicode string as
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a sequence of byte-encoded differences (slope detection),
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * preserving lexical order.
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Optimize the difference-taking for runs of Unicode text within
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * small scripts:
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Most small scripts are allocated within aligned 128-blocks of Unicode
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code points. Lexical order is preserved if the "previous code point" state
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is always moved into the middle of such a block.
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * areas into the middle of those areas.
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * C0 control codes and space are encoded with their US-ASCII bytes.
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "prev" is reset for C0 controls but not for space.
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* initial value for "prev": middle of the ASCII range */
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_ASCII_PREV        0x40
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* bounding byte values for differences */
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_MIN               0x21
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_MIDDLE            0x90
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_MAX_LEAD          0xfe
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* add the L suffix to make computations with BOCU1_MAX_TRAIL work on 16-bit compilers */
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_MAX_TRAIL         0xffL
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_RESET             0xff
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* number of lead bytes */
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* adjust trail byte counts for the use of some C0 control byte values */
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_TRAIL_CONTROLS_COUNT  20
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* number of trail bytes */
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * number of positive and negative single-byte codes
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (counting 0==BOCU1_MIDDLE among the positive ones)
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_SINGLE            64
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* number of lead bytes for positive and negative 2/3/4-byte sequences */
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_LEAD_2            43
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_LEAD_3            3
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_LEAD_4            1
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The difference value range for single-byters. */
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The difference value range for double-byters. */
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The difference value range for 3-byters. */
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_POS_3   \
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The lead byte start values. */
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     /* ==BOCU1_MAX_LEAD */
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     /* ==BOCU1_MIN+1 */
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_LENGTH_FROM_LEAD(lead) \
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The length of a byte sequence, according to its packed form. */
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_LENGTH_FROM_PACKED(packed) \
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 12 commonly used C0 control codes (and space) are only used to encode
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * themselves directly,
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * which makes BOCU-1 MIME-usable and reasonably safe for
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ASCII-oriented software.
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * These controls are
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  0   NUL
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  7   BEL
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  8   BS
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  9   TAB
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  a   LF
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  b   VT
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  c   FF
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  d   CR
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  e   SO
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  f   SI
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1a   SUB
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1b   ESC
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The other 20 C0 controls are also encoded directly (to preserve order)
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but are also used as trail bytes in difference encoding
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (for better compression).
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Byte value map for control codes,
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from external byte values 0x00..0x20
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * External byte values that are illegal as trail bytes are mapped to -1.
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int8_t
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbocu1ByteToTrail[BOCU1_MIN]={
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  0     1     2     3     4     5     6     7    */
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  8     9     a     b     c     d     e     f    */
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  10    11    12    13    14    15    16    17   */
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  18    19    1a    1b    1c    1d    1e    1f   */
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  20   */
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    -1
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Byte value map for control codes,
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from trail byte values 0..19 (0..0x13) as used in the difference calculation
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to external byte values 0x00..0x20.
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int8_t
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  0     1     2     3     4     5     6     7    */
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  8     9     a     b     c     d     e     f    */
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*  10    11    12    13   */
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0x1c, 0x1d, 0x1e, 0x1f
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Integer division and modulo with negative numerators
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * yields negative modulo results and quotients that are one more than
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * what we need here.
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This macro adjust the results so that the modulo-value m is always >=0.
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For positive n, the if() condition is always FALSE.
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param n Number to be split into quotient and rest.
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *          Will be modified to contain the quotient.
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param d Divisor.
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param m Output variable for the rest (modulo result).
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define NEGDIVMOD(n, d, m) { \
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    (m)=(n)%(d); \
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    (n)/=(d); \
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((m)<0) { \
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        --(n); \
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (m)+=(d); \
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } \
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* State for BOCU-1 decoder function. */
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct Bocu1Rx {
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t prev, count, diff;
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct Bocu1Rx Bocu1Rx;
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Function prototypes ------------------------------------------------------ */
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* see bocu1.c */
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpackDiff(int32_t diff);
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgencodeBocu1(int32_t *pPrev, int32_t c);
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdecodeBocu1(Bocu1Rx *pRx, uint8_t b);
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* icuhtml/design/conversion/bocu1/bocu1.c ---------------------------------- */
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* BOCU-1 implementation functions ------------------------------------------ */
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Compute the next "previous" value for differencing
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the current code point.
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param c current code point, 0..0x10ffff
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return "previous code point" state value
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbocu1Prev(int32_t c) {
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* compute new prev */
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(0x3040<=c && c<=0x309f) {
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* Hiragana is not 128-aligned */
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0x3070;
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(0x4e00<=c && c<=0x9fa5) {
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* CJK Unihan */
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0x4e00-BOCU1_REACH_NEG_2;
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(0xac00<=c && c<=0xd7a3) {
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* Korean Hangul (cast to int32_t to avoid wraparound on 16-bit compilers) */
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return ((int32_t)0xd7a3+(int32_t)0xac00)/2;
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* mostly small scripts */
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (c&~0x7f)+BOCU1_ASCII_PREV;
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and return a packed integer with them.
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The encoding favors small absolut differences with short encodings
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to compress runs of same-script characters.
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param diff difference value -0x10ffff..0x10ffff
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0x010000zz for 1-byte sequence zz
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0x0200yyzz for 2-byte sequence yy zz
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0x03xxyyzz for 3-byte sequence xx yy zz
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpackDiff(int32_t diff) {
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t result, m, lead, count, shift;
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(diff>=BOCU1_REACH_NEG_1) {
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* mostly positive differences, and single-byte negative ones */
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(diff<=BOCU1_REACH_POS_1) {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* single byte */
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0x01000000|(BOCU1_MIDDLE+diff);
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(diff<=BOCU1_REACH_POS_2) {
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* two bytes */
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_POS_1+1;
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_POS_2;
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=1;
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(diff<=BOCU1_REACH_POS_3) {
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* three bytes */
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_POS_2+1;
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_POS_3;
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=2;
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* four bytes */
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_POS_3+1;
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_POS_4;
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=3;
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* two- and four-byte negative differences */
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(diff>=BOCU1_REACH_NEG_2) {
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* two bytes */
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_NEG_1;
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_NEG_2;
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=1;
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(diff>=BOCU1_REACH_NEG_3) {
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* three bytes */
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_NEG_2;
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_NEG_3;
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=2;
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* four bytes */
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            diff-=BOCU1_REACH_NEG_3;
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lead=BOCU1_START_NEG_4;
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=3;
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* encode the length of the packed result */
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count<3) {
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result=(count+1)<<24;
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else /* count==3, MSB used for the lead byte */ {
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result=0;
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* calculate trail bytes like digits in itoa() */
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    shift=0;
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result|=BOCU1_TRAIL_TO_BYTE(m)<<shift;
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        shift+=8;
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while(--count>0);
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add lead byte */
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result|=(lead+diff)<<shift;
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * BOCU-1 encoder function.
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pPrev pointer to the integer that holds
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *        the "previous code point" state;
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *        the initial value should be 0 which
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *        encodeBocu1 will set to the actual BOCU-1 initial state value
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param c the code point to encode
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return the packed 1/2/3/4-byte encoding, see packDiff(),
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *         or 0 if an error occurs
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see packDiff
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgencodeBocu1(int32_t *pPrev, int32_t c) {
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t prev;
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(pPrev==NULL || c<0 || c>0x10ffff) {
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* illegal argument */
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    prev=*pPrev;
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(prev==0) {
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* lenient handling of initial value 0 */
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        prev=*pPrev=BOCU1_ASCII_PREV;
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c<=0x20) {
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * ISO C0 control & space:
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Encode directly for MIME compatibility,
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * and reset state except for space, to not disrupt compression.
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c!=0x20) {
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pPrev=BOCU1_ASCII_PREV;
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0x01000000|c;
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * all other Unicode code points c==U+0021..U+10ffff
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * are encoded with the difference c-prev
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * a new prev is computed from c,
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * placed in the middle of a 0x80-block (for most small scripts) or
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * in the middle of the Unihan and Hangul blocks
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * to statistically minimize the following difference
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *pPrev=bocu1Prev(c);
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return packDiff(c-prev);
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Function for BOCU-1 decoder; handles multi-byte lead bytes.
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pRx pointer to the decoder state structure
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param b lead byte;
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<=BOCU1_MAX_LEAD
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return -1 (state change only)
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see decodeBocu1
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdecodeBocu1LeadByte(Bocu1Rx *pRx, uint8_t b) {
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t c, count;
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(b>=BOCU1_START_NEG_2) {
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* positive difference */
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(b<BOCU1_START_POS_3) {
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* two bytes */
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=1;
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(b<BOCU1_START_POS_4) {
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* three bytes */
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=2;
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* four bytes */
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=BOCU1_REACH_POS_3+1;
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=3;
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* negative difference */
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(b>=BOCU1_START_NEG_3) {
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* two bytes */
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=1;
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(b>BOCU1_MIN) {
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* three bytes */
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=2;
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* four bytes */
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=3;
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* set the state for decoding the trail byte(s) */
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pRx->diff=c;
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pRx->count=count;
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Function for BOCU-1 decoder; handles multi-byte trail bytes.
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pRx pointer to the decoder state structure
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param b trail byte
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return result value, same as decodeBocu1
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see decodeBocu1
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdecodeBocu1TrailByte(Bocu1Rx *pRx, uint8_t b) {
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t t, c, count;
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(b<=0x20) {
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* skip some C0 controls and make the trail byte range contiguous */
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        t=bocu1ByteToTrail[b];
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(t<0) {
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* illegal trail byte value */
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->prev=BOCU1_ASCII_PREV;
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->count=0;
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return -99;
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if BOCU1_MAX_TRAIL<0xff
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(b>BOCU1_MAX_TRAIL) {
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -99;
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        t=(int32_t)b-BOCU1_TRAIL_BYTE_OFFSET;
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add trail byte into difference and decrement count */
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c=pRx->diff;
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    count=pRx->count;
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count==1) {
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* final trail byte, deliver a code point */
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=pRx->prev+c+t;
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(0<=c && c<=0x10ffff) {
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* valid code point result */
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->prev=bocu1Prev(c);
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->count=0;
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return c;
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* illegal code point result */
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->prev=BOCU1_ASCII_PREV;
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->count=0;
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return -99;
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* intermediate trail byte */
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count==2) {
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pRx->diff=c+t*BOCU1_TRAIL_COUNT;
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else /* count==3 */ {
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pRx->diff=c+t*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT;
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pRx->count=count-1;
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * BOCU-1 decoder function.
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pRx pointer to the decoder state structure;
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *        the initial values should be 0 which
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *        decodeBocu1 will set to actual initial state values
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param b an input byte
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0..0x10ffff for a result code point
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      -1 if only the state changed without code point output
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *     <-1 if an error occurs
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdecodeBocu1(Bocu1Rx *pRx, uint8_t b) {
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t prev, c, count;
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(pRx==NULL) {
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* illegal argument */
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -99;
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    prev=pRx->prev;
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(prev==0) {
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* lenient handling of initial 0 values */
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        prev=pRx->prev=BOCU1_ASCII_PREV;
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        count=pRx->count=0;
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        count=pRx->count;
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count==0) {
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* byte in lead position */
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(b<=0x20) {
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Direct-encoded C0 control code or space.
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Reset prev for C0 control codes but not for space.
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(b!=0x20) {
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pRx->prev=BOCU1_ASCII_PREV;
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return b;
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * b is a difference lead byte.
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Return a code point directly from a single-byte difference.
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * For multi-byte difference lead bytes, set the decoder state
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * with the partial difference value from the lead byte and
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * with the number of trail bytes.
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * For four-byte differences, the signedness also affects the
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * first trail byte, which has special handling farther below.
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(b>=BOCU1_START_NEG_2 && b<BOCU1_START_POS_2) {
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* single-byte difference */
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=prev+((int32_t)b-BOCU1_MIDDLE);
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->prev=bocu1Prev(c);
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return c;
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(b==BOCU1_RESET) {
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* only reset the state, no code point */
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pRx->prev=BOCU1_ASCII_PREV;
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return -1;
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return decodeBocu1LeadByte(pRx, b);
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* trail byte in any position */
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return decodeBocu1TrailByte(pRx, b);
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* icuhtml/design/conversion/bocu1/bocu1tst.c ------------------------------- */
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* test code ---------------------------------------------------------------- */
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* test code options */
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ignore comma when processing name lists in testText() */
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define TEST_IGNORE_COMMA       1
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Write a packed BOCU-1 byte sequence into a byte array,
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * without overflow check.
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test function.
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param packed packed BOCU-1 byte sequence, see packDiff()
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param p pointer to byte array
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return number of bytes
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see packDiff
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgwritePacked(int32_t packed, uint8_t *p) {
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count=BOCU1_LENGTH_FROM_PACKED(packed);
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch(count) {
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 4:
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *p++=(uint8_t)(packed>>24);
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 3:
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *p++=(uint8_t)(packed>>16);
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 2:
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *p++=(uint8_t)(packed>>8);
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 1:
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *p++=(uint8_t)packed;
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return count;
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unpack a packed BOCU-1 non-C0/space byte sequence and get
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the difference to initialPrev.
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Used only for round-trip testing of the difference encoding and decoding.
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test function.
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param initialPrev bogus "previous code point" value to make sure that
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                    the resulting code point is in the range 0..0x10ffff
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param packed packed BOCU-1 byte sequence
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return the difference to initialPrev
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see packDiff
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see writeDiff
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunpackDiff(int32_t initialPrev, int32_t packed) {
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Bocu1Rx rx={ 0, 0, 0 };
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count;
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    rx.prev=initialPrev;
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    count=BOCU1_LENGTH_FROM_PACKED(packed);
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch(count) {
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 4:
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        decodeBocu1(&rx, (uint8_t)(packed>>24));
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 3:
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        decodeBocu1(&rx, (uint8_t)(packed>>16));
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 2:
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        decodeBocu1(&rx, (uint8_t)(packed>>8));
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 1:
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* subtract initial prev */
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return decodeBocu1(&rx, (uint8_t)packed)-initialPrev;
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -0x7fffffff;
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes,
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * preserving lexical order.
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Also checks for roundtripping of the difference encoding.
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test function.
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param diff difference value to test, -0x10ffff..0x10ffff
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param p pointer to output byte array
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return p advanced by number of bytes output
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @see unpackDiff
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint8_t *
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgwriteDiff(int32_t diff, uint8_t *p) {
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* generate the difference as a packed value and serialize it */
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t packed, initialPrev;
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    packed=packDiff(diff);
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * bogus initial "prev" to work around
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * code point range check in decodeBocu1()
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(diff<=0) {
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        initialPrev=0x10ffff;
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        initialPrev=-1;
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(diff!=unpackDiff(initialPrev, packed)) {
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n",
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                diff, packed, unpackDiff(initialPrev, packed));
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return p+writePacked(packed, p);
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Encode a UTF-16 string in BOCU-1.
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does not check for overflows, but otherwise useful function.
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param s input UTF-16 string
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param length number of UChar code units in s
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param p pointer to output byte array
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return number of bytes output
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgwriteString(const UChar *s, int32_t length, uint8_t *p) {
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *p0;
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t c, prev, i;
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    prev=0;
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    p0=p;
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=0;
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(i<length) {
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_NEXT(s, i, length, c);
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        p+=writePacked(encodeBocu1(&prev, c), p);
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)(p-p0);
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Decode a BOCU-1 byte sequence to a UTF-16 string.
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does not check for overflows, but otherwise useful function.
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param p pointer to input BOCU-1 bytes
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param length number of input bytes
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param s point to output UTF-16 string array
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return number of UChar code units output
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgreadString(const uint8_t *p, int32_t length, UChar *s) {
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Bocu1Rx rx={ 0, 0, 0 };
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t c, i, sLength;
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=sLength=0;
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(i<length) {
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=decodeBocu1(&rx, p[i++]);
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c<-1) {
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            log_err("error: readString detects encoding error at string index %ld\n", i);
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return -1;
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c>=0) {
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_APPEND_UNSAFE(s, sLength, c);
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return sLength;
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic char
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orghexDigit(uint8_t digit) {
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Pretty-print 0-terminated byte values.
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Helper function for test output.
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param bytes 0-terminated byte array to print
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprintBytes(uint8_t *bytes, char *out) {
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int i;
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t b;
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=0;
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while((b=*bytes++)!=0) {
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *out++=' ';
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *out++=hexDigit((uint8_t)(b>>4));
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *out++=hexDigit((uint8_t)(b&0xf));
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++i;
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=3*(5-i);
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(i>0) {
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *out++=' ';
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        --i;
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *out=0;
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Basic BOCU-1 test function, called when there are no command line arguments.
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Prints some of the #define values and performs round-trip tests of the
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * difference encoding and decoding.
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTestBOCU1RefDiff(void) {
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char buf1[80], buf2[80];
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t prev[5], level[5];
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i, cmp, countErrors;
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("reach of single bytes: %ld\n", 1+BOCU1_REACH_POS_1-BOCU1_REACH_NEG_1);
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("reach of 2 bytes     : %ld\n", 1+BOCU1_REACH_POS_2-BOCU1_REACH_NEG_2);
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("reach of 3 bytes     : %ld\n\n", 1+BOCU1_REACH_POS_3-BOCU1_REACH_NEG_3);
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_REACH_NEG_1 %8ld    BOCU1_REACH_POS_1 %8ld\n", BOCU1_REACH_NEG_1, BOCU1_REACH_POS_1);
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_REACH_NEG_2 %8ld    BOCU1_REACH_POS_2 %8ld\n", BOCU1_REACH_NEG_2, BOCU1_REACH_POS_2);
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_REACH_NEG_3 %8ld    BOCU1_REACH_POS_3 %8ld\n\n", BOCU1_REACH_NEG_3, BOCU1_REACH_POS_3);
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_MIDDLE      0x%02x\n", BOCU1_MIDDLE);
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_START_NEG_2 0x%02x    BOCU1_START_POS_2 0x%02x\n", BOCU1_START_NEG_2, BOCU1_START_POS_2);
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("    BOCU1_START_NEG_3 0x%02x    BOCU1_START_POS_3 0x%02x\n\n", BOCU1_START_NEG_3, BOCU1_START_POS_3);
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* test packDiff() & unpackDiff() with some specific values */
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(0, level);
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(1, level);
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(65, level);
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(130, level);
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(30000, level);
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(1000000, level);
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(-65, level);
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(-130, level);
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(-30000, level);
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeDiff(-1000000, level);
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* test that each value is smaller than any following one */
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    countErrors=0;
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=-0x10ffff;
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *writeDiff(i, prev)=0;
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* show first number and bytes */
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    printBytes(prev, buf1);
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("              wD(%8ld)                    %s\n", i, buf1);
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(++i; i<=0x10ffff; ++i) {
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *writeDiff(i, level)=0;
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cmp=strcmp((const char *)prev, (const char *)level);
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(BOCU1_LENGTH_FROM_LEAD(level[0])!=(int32_t)strlen((const char *)level)) {
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            log_verbose("BOCU1_LENGTH_FROM_LEAD(0x%02x)=%ld!=%ld=strlen(writeDiff(%ld))\n",
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   level[0], BOCU1_LENGTH_FROM_LEAD(level[0]), strlen((const char *)level), i);
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(cmp<0) {
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(i==0 || i==1 || strlen((const char *)prev)!=strlen((const char *)level)) {
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /*
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * if the result is good, then print only if the length changed
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * to get little but interesting output
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 */
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                printBytes(prev, buf1);
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                printBytes(level, buf2);
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                log_verbose("ok:    strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++countErrors;
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printBytes(prev, buf1);
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printBytes(level, buf2);
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            log_verbose("wrong: strcmp(wD(%8ld), wD(%8ld))=%2d  %s%s\n", i-1, i, cmp, buf1, buf2);
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* remember the previous bytes */
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        memcpy(prev, level, 4);
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* show last number and bytes */
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    printBytes((uint8_t *)"", buf1);
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    printBytes(prev, buf2);
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("                            wD(%8ld)      %s%s\n", i-1, buf1, buf2);
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(countErrors==0) {
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_verbose("writeDiff(-0x10ffff..0x10ffff) works fine\n");
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("writeDiff(-0x10ffff..0x10ffff) violates lexical ordering in %d cases\n", countErrors);
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* output signature byte sequence */
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    i=0;
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writePacked(encodeBocu1(&i, 0xfeff), level);
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    log_verbose("\nBOCU-1 signature byte sequence: %02x %02x %02x\n",
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            level[0], level[1], level[2]);
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* cintltst code ------------------------------------------------------------ */
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int32_t DEFAULT_BUFFER_SIZE = 30000;
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* test one string with the ICU and the reference BOCU-1 implementations */
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgroundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) {
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *roundtripRef, *roundtripICU;
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char *bocu1Ref, *bocu1ICU;
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength;
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode;
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bocu1Ref = malloc(DEFAULT_BUFFER_SIZE);
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bocu1ICU = malloc(DEFAULT_BUFFER_SIZE);
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Unicode -> BOCU-1 */
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref);
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errorCode=U_ZERO_ERROR;
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode);
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup;
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) {
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength);
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup;
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* BOCU-1 -> Unicode */
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef);
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(roundtripRefLength<0) {
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup; /* readString() found an error and reported it */
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode);
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup;
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) {
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength);
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup;
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) {
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength);
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto cleanup;
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanup:
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    free(roundtripRef);
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    free(roundtripICU);
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    free(bocu1Ref);
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    free(bocu1ICU);
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar feff[]={ 0xfeff };
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar ascii[]={ 0x61, 0x62, 0x20, 0x63, 0x61 };
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar crlf[]={ 0xd, 0xa, 0x20 };
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar nul[]={ 0 };
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar latin[]={ 0xdf, 0xe6 };
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar devanagari[]={ 0x930, 0x20, 0x918, 0x909 };
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar hiragana[]={ 0x3086, 0x304d, 0x20, 0x3053, 0x4000 };
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar unihan[]={ 0x4e00, 0x7777, 0x20, 0x9fa5, 0x4e00 };
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar hangul[]={ 0xac00, 0xbcde, 0x20, 0xd7a3 };
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar surrogates[]={ 0xdc00, 0xd800 }; /* single surrogates, unmatched! */
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar plane1[]={ 0xd800, 0xdc00 };
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar plane2[]={ 0xd845, 0xdddd };
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar plane15[]={ 0xdbbb, 0xddee, 0x20 };
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar plane16[]={ 0xdbff, 0xdfff };
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar c0[]={ 1, 0xe40, 0x20, 9 };
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const struct {
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *s;
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length;
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} strings[]={
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { feff,         LENGTHOF(feff) },
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { ascii,        LENGTHOF(ascii) },
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { crlf,         LENGTHOF(crlf) },
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { nul,          LENGTHOF(nul) },
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { latin,        LENGTHOF(latin) },
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { devanagari,   LENGTHOF(devanagari) },
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { hiragana,     LENGTHOF(hiragana) },
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { unihan,       LENGTHOF(unihan) },
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { hangul,       LENGTHOF(hangul) },
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { surrogates,   LENGTHOF(surrogates) },
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { plane1,       LENGTHOF(plane1) },
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { plane2,       LENGTHOF(plane2) },
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { plane15,      LENGTHOF(plane15) },
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { plane16,      LENGTHOF(plane16) },
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { c0,           LENGTHOF(c0) }
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Verify that the ICU BOCU-1 implementation produces the same results as
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the reference implementation from the design folder.
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Generate some texts and convert them with both converters, verifying
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * identical results and roundtripping.
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTestBOCU1(void) {
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *text;
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i, length;
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *bocu1;
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode;
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errorCode=U_ZERO_ERROR;
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bocu1=ucnv_open("BOCU-1", &errorCode);
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode));
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* text 1: each of strings[] once */
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    length=0;
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0; i<LENGTHOF(strings); ++i) {
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_memcpy(text+length, strings[i].s, strings[i].length);
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length+=strings[i].length;
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripBOCU1(bocu1, 1, text, length);
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* text 2: each of strings[] twice */
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    length=0;
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0; i<LENGTHOF(strings); ++i) {
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_memcpy(text+length, strings[i].s, strings[i].length);
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length+=strings[i].length;
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_memcpy(text+length, strings[i].s, strings[i].length);
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length+=strings[i].length;
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripBOCU1(bocu1, 2, text, length);
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    length=0;
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=1; length<5000; i+=7) {
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(i>=LENGTHOF(strings)) {
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            i-=LENGTHOF(strings);
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        u_memcpy(text+length, strings[i].s, strings[i].length);
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length+=strings[i].length;
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    roundtripBOCU1(bocu1, 3, text, length);
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_close(bocu1);
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    free(text);
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void addBOCU1Tests(TestNode** root);
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgaddBOCU1Tests(TestNode** root) {
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    addTest(root, TestBOCU1RefDiff, "tsconv/bocu1tst/TestBOCU1RefDiff");
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    addTest(root, TestBOCU1, "tsconv/bocu1tst/TestBOCU1");
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1034