1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2000-2003, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*
11* File writejava.c
12*
13* Modification History:
14*
15*   Date        Name        Description
16*   01/11/02    Ram        Creation.
17*******************************************************************************
18*/
19#include "rle.h"
20/**
21 * The ESCAPE character is used during run-length encoding.  It signals
22 * a run of identical chars.
23 */
24static const uint16_t ESCAPE = 0xA5A5;
25
26/**
27 * The ESCAPE_BYTE character is used during run-length encoding.  It signals
28 * a run of identical bytes.
29 */
30static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
31
32/**
33 * Append a byte to the given StringBuffer, packing two bytes into each
34 * character.  The state parameter maintains intermediary data between
35 * calls.
36 * @param state A two-element array, with state[0] == 0 if this is the
37 * first byte of a pair, or state[0] != 0 if this is the second byte
38 * of a pair, in which case state[1] is the first byte.
39 */
40static uint16_t*
41appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
42    if(!status || U_FAILURE(*status)){
43        return NULL;
44    }
45    if (state[0] != 0) {
46        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
47        if(buffer < buffLimit){
48            *buffer++ = c;
49        }else{
50            *status = U_BUFFER_OVERFLOW_ERROR;
51        }
52        state[0] = 0;
53        return buffer;
54    }
55    else {
56        state[0] = 1;
57        state[1] = value;
58        return buffer;
59    }
60}
61/**
62 * Encode a run, possibly a degenerate run (of < 4 values).
63 * @param length The length of the run; must be > 0 && <= 0xFF.
64 */
65static uint16_t*
66encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
67    if(!status || U_FAILURE(*status)){
68        return NULL;
69    }
70    if (length < 4) {
71        int32_t j=0;
72        for (; j<length; ++j) {
73            if (value == ESCAPE_BYTE) {
74                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
75            }
76            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
77        }
78    }
79    else {
80        if (length == ESCAPE_BYTE) {
81            if (value == ESCAPE_BYTE){
82               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
83            }
84            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
85            --length;
86        }
87        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
88        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
89        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
90    }
91    return buffer;
92}
93
94#define APPEND( buffer, bufLimit, value, num, status){  \
95    if(buffer<bufLimit){                    \
96        *buffer++=(value);                  \
97    }else{                                  \
98        *status = U_BUFFER_OVERFLOW_ERROR;  \
99    }                                       \
100    num++;                                  \
101}
102
103/**
104 * Encode a run, possibly a degenerate run (of < 4 values).
105 * @param length The length of the run; must be > 0 && <= 0xFFFF.
106 */
107static uint16_t*
108encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
109    int32_t num=0;
110    if (length < 4) {
111        int j=0;
112        for (; j<length; ++j) {
113            if (value == (int32_t) ESCAPE){
114                APPEND(buffer,bufLimit,ESCAPE, num, status);
115
116            }
117            APPEND(buffer,bufLimit,value,num, status);
118        }
119    }
120    else {
121        if (length == (int32_t) ESCAPE) {
122            if (value == (int32_t) ESCAPE){
123                APPEND(buffer,bufLimit,ESCAPE,num,status);
124
125            }
126            APPEND(buffer,bufLimit,value,num,status);
127            --length;
128        }
129        APPEND(buffer,bufLimit,ESCAPE,num,status);
130        APPEND(buffer,bufLimit,(uint16_t) length, num,status);
131        APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
132    }
133    return buffer;
134}
135
136/**
137 * Construct a string representing a char array.  Use run-length encoding.
138 * A character represents itself, unless it is the ESCAPE character.  Then
139 * the following notations are possible:
140 *   ESCAPE ESCAPE   ESCAPE literal
141 *   ESCAPE n c      n instances of character c
142 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
143 * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
144 * If we encounter a run where n == ESCAPE, we represent this as:
145 *   c ESCAPE n-1 c
146 * The ESCAPE value is chosen so as not to collide with commonly
147 * seen values.
148 */
149int32_t
150usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
151    uint16_t* bufLimit =  buffer+bufLen;
152    uint16_t* saveBuffer = buffer;
153    if(buffer < bufLimit){
154        *buffer++ =  (uint16_t)(srcLen>>16);
155        if(buffer<bufLimit){
156            uint16_t runValue = src[0];
157            int32_t runLength = 1;
158            int i=1;
159            *buffer++ = (uint16_t) srcLen;
160
161            for (; i<srcLen; ++i) {
162                uint16_t s = src[i];
163                if (s == runValue && runLength < 0xFFFF){
164                    ++runLength;
165                }else {
166                    buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
167                    runValue = s;
168                    runLength = 1;
169                }
170            }
171            buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
172        }else{
173            *status = U_BUFFER_OVERFLOW_ERROR;
174        }
175    }else{
176        *status = U_BUFFER_OVERFLOW_ERROR;
177    }
178    return (int32_t)(buffer - saveBuffer);
179}
180
181/**
182 * Construct a string representing a byte array.  Use run-length encoding.
183 * Two bytes are packed into a single char, with a single extra zero byte at
184 * the end if needed.  A byte represents itself, unless it is the
185 * ESCAPE_BYTE.  Then the following notations are possible:
186 *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
187 *   ESCAPE_BYTE n b           n instances of byte b
188 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
189 * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
190 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
191 *   b ESCAPE_BYTE n-1 b
192 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
193 * seen values.
194 */
195int32_t
196byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
197    const uint16_t* saveBuf = buffer;
198    uint16_t* bufLimit =  buffer+bufLen;
199    if(buffer < bufLimit){
200        *buffer++ = ((uint16_t) (srcLen >> 16));
201
202        if(buffer<bufLimit){
203            uint8_t runValue = src[0];
204            int runLength = 1;
205            uint8_t state[2]= {0};
206            int i=1;
207            *buffer++=((uint16_t) srcLen);
208            for (; i<srcLen; ++i) {
209                uint8_t b = src[i];
210                if (b == runValue && runLength < 0xFF){
211                    ++runLength;
212                }
213                else {
214                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
215                    runValue = b;
216                    runLength = 1;
217                }
218            }
219            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
220
221            /* We must save the final byte, if there is one, by padding
222             * an extra zero.
223             */
224            if (state[0] != 0) {
225                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
226            }
227        }else{
228            *status = U_BUFFER_OVERFLOW_ERROR;
229        }
230    }else{
231        *status = U_BUFFER_OVERFLOW_ERROR;
232    }
233    return (int32_t) (buffer - saveBuf);
234}
235
236
237/**
238 * Construct an array of shorts from a run-length encoded string.
239 */
240int32_t
241rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
242    int32_t length = 0;
243    int32_t ai = 0;
244    int i=2;
245
246    if(!status || U_FAILURE(*status)){
247        return 0;
248    }
249    /* the source is null terminated */
250    if(srcLen == -1){
251        srcLen = u_strlen(src);
252    }
253    if(srcLen <= 2){
254        return 2;
255    }
256    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
257
258    if(target == NULL){
259        return length;
260    }
261    if(tgtLen < length){
262        *status = U_BUFFER_OVERFLOW_ERROR;
263        return length;
264    }
265
266    for (; i<srcLen; ++i) {
267        uint16_t c = src[i];
268        if (c == ESCAPE) {
269            c = src[++i];
270            if (c == ESCAPE) {
271                target[ai++] = c;
272            } else {
273                int32_t runLength = (int32_t) c;
274                uint16_t runValue = src[++i];
275                int j=0;
276                for (; j<runLength; ++j) {
277                    target[ai++] = runValue;
278                }
279            }
280        }
281        else {
282            target[ai++] = c;
283        }
284    }
285
286    if (ai != length){
287        *status = U_INTERNAL_PROGRAM_ERROR;
288    }
289
290    return length;
291}
292
293/**
294 * Construct an array of bytes from a run-length encoded string.
295 */
296int32_t
297rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
298
299    int32_t length = 0;
300    UBool nextChar = TRUE;
301    uint16_t c = 0;
302    int32_t node = 0;
303    int32_t runLength = 0;
304    int32_t i = 2;
305    int32_t ai=0;
306
307    if(!status || U_FAILURE(*status)){
308        return 0;
309    }
310    /* the source is null terminated */
311    if(srcLen == -1){
312        srcLen = u_strlen(src);
313    }
314    if(srcLen <= 2){
315        return 2;
316    }
317    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
318
319    if(target == NULL){
320        return length;
321    }
322    if(tgtLen < length){
323        *status = U_BUFFER_OVERFLOW_ERROR;
324        return length;
325    }
326
327    for (; ai<tgtLen; ) {
328       /* This part of the loop places the next byte into the local
329        * variable 'b' each time through the loop.  It keeps the
330        * current character in 'c' and uses the boolean 'nextChar'
331        * to see if we've taken both bytes out of 'c' yet.
332        */
333        uint8_t b;
334        if (nextChar) {
335            c = src[i++];
336            b = (uint8_t) (c >> 8);
337            nextChar = FALSE;
338        }
339        else {
340            b = (uint8_t) (c & 0xFF);
341            nextChar = TRUE;
342        }
343
344       /* This part of the loop is a tiny state machine which handles
345        * the parsing of the run-length encoding.  This would be simpler
346        * if we could look ahead, but we can't, so we use 'node' to
347        * move between three nodes in the state machine.
348        */
349        switch (node) {
350        case 0:
351            /* Normal idle node */
352            if (b == ESCAPE_BYTE) {
353                node = 1;
354            }
355            else {
356                target[ai++] = b;
357            }
358            break;
359        case 1:
360           /* We have seen one ESCAPE_BYTE; we expect either a second
361            * one, or a run length and value.
362            */
363            if (b == ESCAPE_BYTE) {
364                target[ai++] = ESCAPE_BYTE;
365                node = 0;
366            }
367            else {
368                runLength = b;
369                node = 2;
370            }
371            break;
372        case 2:
373            {
374                int j=0;
375               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
376                * the next byte as the value to be repeated.
377                */
378                for (; j<runLength; ++j){
379                    if(ai<tgtLen){
380                        target[ai++] = b;
381                    }else{
382                        *status = U_BUFFER_OVERFLOW_ERROR;
383                        return ai;
384                    }
385                }
386                node = 0;
387                break;
388            }
389        }
390    }
391
392    if (node != 0){
393        *status = U_INTERNAL_PROGRAM_ERROR;
394        /*("Bad run-length encoded byte array")*/
395        return 0;
396    }
397
398
399    if (i != srcLen){
400        /*("Excess data in RLE byte array string");*/
401        *status = U_INTERNAL_PROGRAM_ERROR;
402        return ai;
403    }
404
405    return ai;
406}
407
408