1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 2000-2003, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* 11* File writejava.c 12* 13* Modification History: 14* 15* Date Name Description 16* 01/11/02 Ram Creation. 17******************************************************************************* 18*/ 19#include "rle.h" 20/** 21 * The ESCAPE character is used during run-length encoding. It signals 22 * a run of identical chars. 23 */ 24static const uint16_t ESCAPE = 0xA5A5; 25 26/** 27 * The ESCAPE_BYTE character is used during run-length encoding. It signals 28 * a run of identical bytes. 29 */ 30static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; 31 32/** 33 * Append a byte to the given StringBuffer, packing two bytes into each 34 * character. The state parameter maintains intermediary data between 35 * calls. 36 * @param state A two-element array, with state[0] == 0 if this is the 37 * first byte of a pair, or state[0] != 0 if this is the second byte 38 * of a pair, in which case state[1] is the first byte. 39 */ 40static uint16_t* 41appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { 42 if(!status || U_FAILURE(*status)){ 43 return NULL; 44 } 45 if (state[0] != 0) { 46 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); 47 if(buffer < buffLimit){ 48 *buffer++ = c; 49 }else{ 50 *status = U_BUFFER_OVERFLOW_ERROR; 51 } 52 state[0] = 0; 53 return buffer; 54 } 55 else { 56 state[0] = 1; 57 state[1] = value; 58 return buffer; 59 } 60} 61/** 62 * Encode a run, possibly a degenerate run (of < 4 values). 63 * @param length The length of the run; must be > 0 && <= 0xFF. 64 */ 65static uint16_t* 66encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { 67 if(!status || U_FAILURE(*status)){ 68 return NULL; 69 } 70 if (length < 4) { 71 int32_t j=0; 72 for (; j<length; ++j) { 73 if (value == ESCAPE_BYTE) { 74 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 75 } 76 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 77 } 78 } 79 else { 80 if (length == ESCAPE_BYTE) { 81 if (value == ESCAPE_BYTE){ 82 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); 83 } 84 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 85 --length; 86 } 87 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 88 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); 89 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ 90 } 91 return buffer; 92} 93 94#define APPEND( buffer, bufLimit, value, num, status){ \ 95 if(buffer<bufLimit){ \ 96 *buffer++=(value); \ 97 }else{ \ 98 *status = U_BUFFER_OVERFLOW_ERROR; \ 99 } \ 100 num++; \ 101} 102 103/** 104 * Encode a run, possibly a degenerate run (of < 4 values). 105 * @param length The length of the run; must be > 0 && <= 0xFFFF. 106 */ 107static uint16_t* 108encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { 109 int32_t num=0; 110 if (length < 4) { 111 int j=0; 112 for (; j<length; ++j) { 113 if (value == (int32_t) ESCAPE){ 114 APPEND(buffer,bufLimit,ESCAPE, num, status); 115 116 } 117 APPEND(buffer,bufLimit,value,num, status); 118 } 119 } 120 else { 121 if (length == (int32_t) ESCAPE) { 122 if (value == (int32_t) ESCAPE){ 123 APPEND(buffer,bufLimit,ESCAPE,num,status); 124 125 } 126 APPEND(buffer,bufLimit,value,num,status); 127 --length; 128 } 129 APPEND(buffer,bufLimit,ESCAPE,num,status); 130 APPEND(buffer,bufLimit,(uint16_t) length, num,status); 131 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ 132 } 133 return buffer; 134} 135 136/** 137 * Construct a string representing a char array. Use run-length encoding. 138 * A character represents itself, unless it is the ESCAPE character. Then 139 * the following notations are possible: 140 * ESCAPE ESCAPE ESCAPE literal 141 * ESCAPE n c n instances of character c 142 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 143 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 144 * If we encounter a run where n == ESCAPE, we represent this as: 145 * c ESCAPE n-1 c 146 * The ESCAPE value is chosen so as not to collide with commonly 147 * seen values. 148 */ 149int32_t 150usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { 151 uint16_t* bufLimit = buffer+bufLen; 152 uint16_t* saveBuffer = buffer; 153 if(buffer < bufLimit){ 154 *buffer++ = (uint16_t)(srcLen>>16); 155 if(buffer<bufLimit){ 156 uint16_t runValue = src[0]; 157 int32_t runLength = 1; 158 int i=1; 159 *buffer++ = (uint16_t) srcLen; 160 161 for (; i<srcLen; ++i) { 162 uint16_t s = src[i]; 163 if (s == runValue && runLength < 0xFFFF){ 164 ++runLength; 165 }else { 166 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); 167 runValue = s; 168 runLength = 1; 169 } 170 } 171 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); 172 }else{ 173 *status = U_BUFFER_OVERFLOW_ERROR; 174 } 175 }else{ 176 *status = U_BUFFER_OVERFLOW_ERROR; 177 } 178 return (int32_t)(buffer - saveBuffer); 179} 180 181/** 182 * Construct a string representing a byte array. Use run-length encoding. 183 * Two bytes are packed into a single char, with a single extra zero byte at 184 * the end if needed. A byte represents itself, unless it is the 185 * ESCAPE_BYTE. Then the following notations are possible: 186 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 187 * ESCAPE_BYTE n b n instances of byte b 188 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 189 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 190 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 191 * b ESCAPE_BYTE n-1 b 192 * The ESCAPE_BYTE value is chosen so as not to collide with commonly 193 * seen values. 194 */ 195int32_t 196byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { 197 const uint16_t* saveBuf = buffer; 198 uint16_t* bufLimit = buffer+bufLen; 199 if(buffer < bufLimit){ 200 *buffer++ = ((uint16_t) (srcLen >> 16)); 201 202 if(buffer<bufLimit){ 203 uint8_t runValue = src[0]; 204 int runLength = 1; 205 uint8_t state[2]= {0}; 206 int i=1; 207 *buffer++=((uint16_t) srcLen); 208 for (; i<srcLen; ++i) { 209 uint8_t b = src[i]; 210 if (b == runValue && runLength < 0xFF){ 211 ++runLength; 212 } 213 else { 214 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); 215 runValue = b; 216 runLength = 1; 217 } 218 } 219 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); 220 221 /* We must save the final byte, if there is one, by padding 222 * an extra zero. 223 */ 224 if (state[0] != 0) { 225 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); 226 } 227 }else{ 228 *status = U_BUFFER_OVERFLOW_ERROR; 229 } 230 }else{ 231 *status = U_BUFFER_OVERFLOW_ERROR; 232 } 233 return (int32_t) (buffer - saveBuf); 234} 235 236 237/** 238 * Construct an array of shorts from a run-length encoded string. 239 */ 240int32_t 241rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { 242 int32_t length = 0; 243 int32_t ai = 0; 244 int i=2; 245 246 if(!status || U_FAILURE(*status)){ 247 return 0; 248 } 249 /* the source is null terminated */ 250 if(srcLen == -1){ 251 srcLen = u_strlen(src); 252 } 253 if(srcLen <= 2){ 254 return 2; 255 } 256 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 257 258 if(target == NULL){ 259 return length; 260 } 261 if(tgtLen < length){ 262 *status = U_BUFFER_OVERFLOW_ERROR; 263 return length; 264 } 265 266 for (; i<srcLen; ++i) { 267 uint16_t c = src[i]; 268 if (c == ESCAPE) { 269 c = src[++i]; 270 if (c == ESCAPE) { 271 target[ai++] = c; 272 } else { 273 int32_t runLength = (int32_t) c; 274 uint16_t runValue = src[++i]; 275 int j=0; 276 for (; j<runLength; ++j) { 277 target[ai++] = runValue; 278 } 279 } 280 } 281 else { 282 target[ai++] = c; 283 } 284 } 285 286 if (ai != length){ 287 *status = U_INTERNAL_PROGRAM_ERROR; 288 } 289 290 return length; 291} 292 293/** 294 * Construct an array of bytes from a run-length encoded string. 295 */ 296int32_t 297rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { 298 299 int32_t length = 0; 300 UBool nextChar = TRUE; 301 uint16_t c = 0; 302 int32_t node = 0; 303 int32_t runLength = 0; 304 int32_t i = 2; 305 int32_t ai=0; 306 307 if(!status || U_FAILURE(*status)){ 308 return 0; 309 } 310 /* the source is null terminated */ 311 if(srcLen == -1){ 312 srcLen = u_strlen(src); 313 } 314 if(srcLen <= 2){ 315 return 2; 316 } 317 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 318 319 if(target == NULL){ 320 return length; 321 } 322 if(tgtLen < length){ 323 *status = U_BUFFER_OVERFLOW_ERROR; 324 return length; 325 } 326 327 for (; ai<tgtLen; ) { 328 /* This part of the loop places the next byte into the local 329 * variable 'b' each time through the loop. It keeps the 330 * current character in 'c' and uses the boolean 'nextChar' 331 * to see if we've taken both bytes out of 'c' yet. 332 */ 333 uint8_t b; 334 if (nextChar) { 335 c = src[i++]; 336 b = (uint8_t) (c >> 8); 337 nextChar = FALSE; 338 } 339 else { 340 b = (uint8_t) (c & 0xFF); 341 nextChar = TRUE; 342 } 343 344 /* This part of the loop is a tiny state machine which handles 345 * the parsing of the run-length encoding. This would be simpler 346 * if we could look ahead, but we can't, so we use 'node' to 347 * move between three nodes in the state machine. 348 */ 349 switch (node) { 350 case 0: 351 /* Normal idle node */ 352 if (b == ESCAPE_BYTE) { 353 node = 1; 354 } 355 else { 356 target[ai++] = b; 357 } 358 break; 359 case 1: 360 /* We have seen one ESCAPE_BYTE; we expect either a second 361 * one, or a run length and value. 362 */ 363 if (b == ESCAPE_BYTE) { 364 target[ai++] = ESCAPE_BYTE; 365 node = 0; 366 } 367 else { 368 runLength = b; 369 node = 2; 370 } 371 break; 372 case 2: 373 { 374 int j=0; 375 /* We have seen an ESCAPE_BYTE and length byte. We interpret 376 * the next byte as the value to be repeated. 377 */ 378 for (; j<runLength; ++j){ 379 if(ai<tgtLen){ 380 target[ai++] = b; 381 }else{ 382 *status = U_BUFFER_OVERFLOW_ERROR; 383 return ai; 384 } 385 } 386 node = 0; 387 break; 388 } 389 } 390 } 391 392 if (node != 0){ 393 *status = U_INTERNAL_PROGRAM_ERROR; 394 /*("Bad run-length encoded byte array")*/ 395 return 0; 396 } 397 398 399 if (i != srcLen){ 400 /*("Excess data in RLE byte array string");*/ 401 *status = U_INTERNAL_PROGRAM_ERROR; 402 return ai; 403 } 404 405 return ai; 406} 407 408