1/*
2*******************************************************************************
3*
4*   Copyright (C) 2000-2003, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File writejava.c
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   01/11/02    Ram        Creation.
15*******************************************************************************
16*/
17#include "rle.h"
18/**
19 * The ESCAPE character is used during run-length encoding.  It signals
20 * a run of identical chars.
21 */
22static const uint16_t ESCAPE = 0xA5A5;
23
24/**
25 * The ESCAPE_BYTE character is used during run-length encoding.  It signals
26 * a run of identical bytes.
27 */
28static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
29
30/**
31 * Append a byte to the given StringBuffer, packing two bytes into each
32 * character.  The state parameter maintains intermediary data between
33 * calls.
34 * @param state A two-element array, with state[0] == 0 if this is the
35 * first byte of a pair, or state[0] != 0 if this is the second byte
36 * of a pair, in which case state[1] is the first byte.
37 */
38static uint16_t*
39appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
40    if(!status || U_FAILURE(*status)){
41        return NULL;
42    }
43    if (state[0] != 0) {
44        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
45        if(buffer < buffLimit){
46            *buffer++ = c;
47        }else{
48            *status = U_BUFFER_OVERFLOW_ERROR;
49        }
50        state[0] = 0;
51        return buffer;
52    }
53    else {
54        state[0] = 1;
55        state[1] = value;
56        return buffer;
57    }
58}
59/**
60 * Encode a run, possibly a degenerate run (of < 4 values).
61 * @param length The length of the run; must be > 0 && <= 0xFF.
62 */
63static uint16_t*
64encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
65    if(!status || U_FAILURE(*status)){
66        return NULL;
67    }
68    if (length < 4) {
69        int32_t j=0;
70        for (; j<length; ++j) {
71            if (value == ESCAPE_BYTE) {
72                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
73            }
74            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
75        }
76    }
77    else {
78        if (length == ESCAPE_BYTE) {
79            if (value == ESCAPE_BYTE){
80               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
81            }
82            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
83            --length;
84        }
85        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
86        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
87        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
88    }
89    return buffer;
90}
91
92#define APPEND( buffer, bufLimit, value, num, status){  \
93    if(buffer<bufLimit){                    \
94        *buffer++=(value);                  \
95    }else{                                  \
96        *status = U_BUFFER_OVERFLOW_ERROR;  \
97    }                                       \
98    num++;                                  \
99}
100
101/**
102 * Encode a run, possibly a degenerate run (of < 4 values).
103 * @param length The length of the run; must be > 0 && <= 0xFFFF.
104 */
105static uint16_t*
106encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
107    int32_t num=0;
108    if (length < 4) {
109        int j=0;
110        for (; j<length; ++j) {
111            if (value == (int32_t) ESCAPE){
112                APPEND(buffer,bufLimit,ESCAPE, num, status);
113
114            }
115            APPEND(buffer,bufLimit,value,num, status);
116        }
117    }
118    else {
119        if (length == (int32_t) ESCAPE) {
120            if (value == (int32_t) ESCAPE){
121                APPEND(buffer,bufLimit,ESCAPE,num,status);
122
123            }
124            APPEND(buffer,bufLimit,value,num,status);
125            --length;
126        }
127        APPEND(buffer,bufLimit,ESCAPE,num,status);
128        APPEND(buffer,bufLimit,(uint16_t) length, num,status);
129        APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
130    }
131    return buffer;
132}
133
134/**
135 * Construct a string representing a char array.  Use run-length encoding.
136 * A character represents itself, unless it is the ESCAPE character.  Then
137 * the following notations are possible:
138 *   ESCAPE ESCAPE   ESCAPE literal
139 *   ESCAPE n c      n instances of character c
140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
141 * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
142 * If we encounter a run where n == ESCAPE, we represent this as:
143 *   c ESCAPE n-1 c
144 * The ESCAPE value is chosen so as not to collide with commonly
145 * seen values.
146 */
147int32_t
148usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
149    uint16_t* bufLimit =  buffer+bufLen;
150    uint16_t* saveBuffer = buffer;
151    if(buffer < bufLimit){
152        *buffer++ =  (uint16_t)(srcLen>>16);
153        if(buffer<bufLimit){
154            uint16_t runValue = src[0];
155            int32_t runLength = 1;
156            int i=1;
157            *buffer++ = (uint16_t) srcLen;
158
159            for (; i<srcLen; ++i) {
160                uint16_t s = src[i];
161                if (s == runValue && runLength < 0xFFFF){
162                    ++runLength;
163                }else {
164                    buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
165                    runValue = s;
166                    runLength = 1;
167                }
168            }
169            buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
170        }else{
171            *status = U_BUFFER_OVERFLOW_ERROR;
172        }
173    }else{
174        *status = U_BUFFER_OVERFLOW_ERROR;
175    }
176    return (int32_t)(buffer - saveBuffer);
177}
178
179/**
180 * Construct a string representing a byte array.  Use run-length encoding.
181 * Two bytes are packed into a single char, with a single extra zero byte at
182 * the end if needed.  A byte represents itself, unless it is the
183 * ESCAPE_BYTE.  Then the following notations are possible:
184 *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
185 *   ESCAPE_BYTE n b           n instances of byte b
186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
187 * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
189 *   b ESCAPE_BYTE n-1 b
190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
191 * seen values.
192 */
193int32_t
194byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
195    const uint16_t* saveBuf = buffer;
196    uint16_t* bufLimit =  buffer+bufLen;
197    if(buffer < bufLimit){
198        *buffer++ = ((uint16_t) (srcLen >> 16));
199
200        if(buffer<bufLimit){
201            uint8_t runValue = src[0];
202            int runLength = 1;
203            uint8_t state[2]= {0};
204            int i=1;
205            *buffer++=((uint16_t) srcLen);
206            for (; i<srcLen; ++i) {
207                uint8_t b = src[i];
208                if (b == runValue && runLength < 0xFF){
209                    ++runLength;
210                }
211                else {
212                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
213                    runValue = b;
214                    runLength = 1;
215                }
216            }
217            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
218
219            /* We must save the final byte, if there is one, by padding
220             * an extra zero.
221             */
222            if (state[0] != 0) {
223                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
224            }
225        }else{
226            *status = U_BUFFER_OVERFLOW_ERROR;
227        }
228    }else{
229        *status = U_BUFFER_OVERFLOW_ERROR;
230    }
231    return (int32_t) (buffer - saveBuf);
232}
233
234
235/**
236 * Construct an array of shorts from a run-length encoded string.
237 */
238int32_t
239rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
240    int32_t length = 0;
241    int32_t ai = 0;
242    int i=2;
243
244    if(!status || U_FAILURE(*status)){
245        return 0;
246    }
247    /* the source is null terminated */
248    if(srcLen == -1){
249        srcLen = u_strlen(src);
250    }
251    if(srcLen <= 2){
252        return 2;
253    }
254    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
255
256    if(target == NULL){
257        return length;
258    }
259    if(tgtLen < length){
260        *status = U_BUFFER_OVERFLOW_ERROR;
261        return length;
262    }
263
264    for (; i<srcLen; ++i) {
265        uint16_t c = src[i];
266        if (c == ESCAPE) {
267            c = src[++i];
268            if (c == ESCAPE) {
269                target[ai++] = c;
270            } else {
271                int32_t runLength = (int32_t) c;
272                uint16_t runValue = src[++i];
273                int j=0;
274                for (; j<runLength; ++j) {
275                    target[ai++] = runValue;
276                }
277            }
278        }
279        else {
280            target[ai++] = c;
281        }
282    }
283
284    if (ai != length){
285        *status = U_INTERNAL_PROGRAM_ERROR;
286    }
287
288    return length;
289}
290
291/**
292 * Construct an array of bytes from a run-length encoded string.
293 */
294int32_t
295rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
296
297    int32_t length = 0;
298    UBool nextChar = TRUE;
299    uint16_t c = 0;
300    int32_t node = 0;
301    int32_t runLength = 0;
302    int32_t i = 2;
303    int32_t ai=0;
304
305    if(!status || U_FAILURE(*status)){
306        return 0;
307    }
308    /* the source is null terminated */
309    if(srcLen == -1){
310        srcLen = u_strlen(src);
311    }
312    if(srcLen <= 2){
313        return 2;
314    }
315    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
316
317    if(target == NULL){
318        return length;
319    }
320    if(tgtLen < length){
321        *status = U_BUFFER_OVERFLOW_ERROR;
322        return length;
323    }
324
325    for (; ai<tgtLen; ) {
326       /* This part of the loop places the next byte into the local
327        * variable 'b' each time through the loop.  It keeps the
328        * current character in 'c' and uses the boolean 'nextChar'
329        * to see if we've taken both bytes out of 'c' yet.
330        */
331        uint8_t b;
332        if (nextChar) {
333            c = src[i++];
334            b = (uint8_t) (c >> 8);
335            nextChar = FALSE;
336        }
337        else {
338            b = (uint8_t) (c & 0xFF);
339            nextChar = TRUE;
340        }
341
342       /* This part of the loop is a tiny state machine which handles
343        * the parsing of the run-length encoding.  This would be simpler
344        * if we could look ahead, but we can't, so we use 'node' to
345        * move between three nodes in the state machine.
346        */
347        switch (node) {
348        case 0:
349            /* Normal idle node */
350            if (b == ESCAPE_BYTE) {
351                node = 1;
352            }
353            else {
354                target[ai++] = b;
355            }
356            break;
357        case 1:
358           /* We have seen one ESCAPE_BYTE; we expect either a second
359            * one, or a run length and value.
360            */
361            if (b == ESCAPE_BYTE) {
362                target[ai++] = ESCAPE_BYTE;
363                node = 0;
364            }
365            else {
366                runLength = b;
367                node = 2;
368            }
369            break;
370        case 2:
371            {
372                int j=0;
373               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
374                * the next byte as the value to be repeated.
375                */
376                for (; j<runLength; ++j){
377                    if(ai<tgtLen){
378                        target[ai++] = b;
379                    }else{
380                        *status = U_BUFFER_OVERFLOW_ERROR;
381                        return ai;
382                    }
383                }
384                node = 0;
385                break;
386            }
387        }
388    }
389
390    if (node != 0){
391        *status = U_INTERNAL_PROGRAM_ERROR;
392        /*("Bad run-length encoded byte array")*/
393        return 0;
394    }
395
396
397    if (i != srcLen){
398        /*("Excess data in RLE byte array string");*/
399        *status = U_INTERNAL_PROGRAM_ERROR;
400        return ai;
401    }
402
403    return ai;
404}
405
406