rle.c (12561B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2000-2003, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File writejava.c 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 01/11/02 Ram Creation. 17 ******************************************************************************* 18 */ 19 #include <stdbool.h> 20 #include "rle.h" 21 /** 22 * The ESCAPE character is used during run-length encoding. It signals 23 * a run of identical chars. 24 */ 25 static const uint16_t ESCAPE = 0xA5A5; 26 27 /** 28 * The ESCAPE_BYTE character is used during run-length encoding. It signals 29 * a run of identical bytes. 30 */ 31 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; 32 33 /** 34 * Append a byte to the given StringBuffer, packing two bytes into each 35 * character. The state parameter maintains intermediary data between 36 * calls. 37 * @param state A two-element array, with state[0] == 0 if this is the 38 * first byte of a pair, or state[0] != 0 if this is the second byte 39 * of a pair, in which case state[1] is the first byte. 40 */ 41 static uint16_t* 42 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { 43 if(!status || U_FAILURE(*status)){ 44 return NULL; 45 } 46 if (state[0] != 0) { 47 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); 48 if(buffer < buffLimit){ 49 *buffer++ = c; 50 }else{ 51 *status = U_BUFFER_OVERFLOW_ERROR; 52 } 53 state[0] = 0; 54 return buffer; 55 } 56 else { 57 state[0] = 1; 58 state[1] = value; 59 return buffer; 60 } 61 } 62 /** 63 * Encode a run, possibly a degenerate run (of < 4 values). 64 * @param length The length of the run; must be > 0 && <= 0xFF. 65 */ 66 static uint16_t* 67 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { 68 if(!status || U_FAILURE(*status)){ 69 return NULL; 70 } 71 if (length < 4) { 72 int32_t j=0; 73 for (; j<length; ++j) { 74 if (value == ESCAPE_BYTE) { 75 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 76 } 77 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 78 } 79 } 80 else { 81 if (length == ESCAPE_BYTE) { 82 if (value == ESCAPE_BYTE){ 83 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); 84 } 85 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 86 --length; 87 } 88 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 89 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); 90 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ 91 } 92 return buffer; 93 } 94 95 #define APPEND( buffer, bufLimit, value, status) UPRV_BLOCK_MACRO_BEGIN { \ 96 if(buffer<bufLimit){ \ 97 *buffer++=(value); \ 98 }else{ \ 99 *status = U_BUFFER_OVERFLOW_ERROR; \ 100 } \ 101 } UPRV_BLOCK_MACRO_END 102 103 /** 104 * Encode a run, possibly a degenerate run (of < 4 values). 105 * @param length The length of the run; must be > 0 && <= 0xFFFF. 106 */ 107 static uint16_t* 108 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { 109 if (length < 4) { 110 int j=0; 111 for (; j<length; ++j) { 112 if (value == (int32_t) ESCAPE){ 113 APPEND(buffer,bufLimit,ESCAPE, status); 114 115 } 116 APPEND(buffer,bufLimit,value,status); 117 } 118 } 119 else { 120 if (length == (int32_t) ESCAPE) { 121 if (value == (int32_t) ESCAPE){ 122 APPEND(buffer,bufLimit,ESCAPE,status); 123 124 } 125 APPEND(buffer,bufLimit,value,status); 126 --length; 127 } 128 APPEND(buffer,bufLimit,ESCAPE,status); 129 APPEND(buffer,bufLimit,(uint16_t) length,status); 130 APPEND(buffer,bufLimit,(uint16_t)value, status); /* Don't need to escape this value */ 131 } 132 return buffer; 133 } 134 135 /** 136 * Construct a string representing a char array. Use run-length encoding. 137 * A character represents itself, unless it is the ESCAPE character. Then 138 * the following notations are possible: 139 * ESCAPE ESCAPE ESCAPE literal 140 * ESCAPE n c n instances of character c 141 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 142 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 143 * If we encounter a run where n == ESCAPE, we represent this as: 144 * c ESCAPE n-1 c 145 * The ESCAPE value is chosen so as not to collide with commonly 146 * seen values. 147 */ 148 int32_t 149 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { 150 uint16_t* bufLimit = buffer+bufLen; 151 uint16_t* saveBuffer = buffer; 152 if(buffer < bufLimit){ 153 *buffer++ = (uint16_t)(srcLen>>16); 154 if(buffer<bufLimit){ 155 uint16_t runValue = src[0]; 156 int32_t runLength = 1; 157 int i=1; 158 *buffer++ = (uint16_t) srcLen; 159 160 for (; i<srcLen; ++i) { 161 uint16_t s = src[i]; 162 if (s == runValue && runLength < 0xFFFF){ 163 ++runLength; 164 }else { 165 buffer = encodeRunShort(buffer, bufLimit, runValue, runLength, status); 166 runValue = s; 167 runLength = 1; 168 } 169 } 170 buffer = encodeRunShort(buffer, bufLimit, runValue, runLength, status); 171 }else{ 172 *status = U_BUFFER_OVERFLOW_ERROR; 173 } 174 }else{ 175 *status = U_BUFFER_OVERFLOW_ERROR; 176 } 177 return (int32_t)(buffer - saveBuffer); 178 } 179 180 /** 181 * Construct a string representing a byte array. Use run-length encoding. 182 * Two bytes are packed into a single char, with a single extra zero byte at 183 * the end if needed. A byte represents itself, unless it is the 184 * ESCAPE_BYTE. Then the following notations are possible: 185 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 186 * ESCAPE_BYTE n b n instances of byte b 187 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 188 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 189 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 190 * b ESCAPE_BYTE n-1 b 191 * The ESCAPE_BYTE value is chosen so as not to collide with commonly 192 * seen values. 193 */ 194 int32_t 195 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { 196 const uint16_t* saveBuf = buffer; 197 uint16_t* bufLimit = buffer+bufLen; 198 if(buffer < bufLimit){ 199 *buffer++ = ((uint16_t) (srcLen >> 16)); 200 201 if(buffer<bufLimit){ 202 uint8_t runValue = src[0]; 203 int runLength = 1; 204 uint8_t state[2]= {0}; 205 int i=1; 206 *buffer++=((uint16_t) srcLen); 207 for (; i<srcLen; ++i) { 208 uint8_t b = src[i]; 209 if (b == runValue && runLength < 0xFF){ 210 ++runLength; 211 } 212 else { 213 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); 214 runValue = b; 215 runLength = 1; 216 } 217 } 218 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); 219 220 /* We must save the final byte, if there is one, by padding 221 * an extra zero. 222 */ 223 if (state[0] != 0) { 224 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); 225 } 226 }else{ 227 *status = U_BUFFER_OVERFLOW_ERROR; 228 } 229 }else{ 230 *status = U_BUFFER_OVERFLOW_ERROR; 231 } 232 return (int32_t) (buffer - saveBuf); 233 } 234 235 236 /** 237 * Construct an array of shorts from a run-length encoded string. 238 */ 239 int32_t 240 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { 241 int32_t length = 0; 242 int32_t ai = 0; 243 int i=2; 244 245 if(!status || U_FAILURE(*status)){ 246 return 0; 247 } 248 /* the source is null terminated */ 249 if(srcLen == -1){ 250 srcLen = u_strlen(src); 251 } 252 if(srcLen <= 2){ 253 return 2; 254 } 255 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 256 257 if(target == NULL){ 258 return length; 259 } 260 if(tgtLen < length){ 261 *status = U_BUFFER_OVERFLOW_ERROR; 262 return length; 263 } 264 265 for (; i<srcLen; ++i) { 266 uint16_t c = src[i]; 267 if (c == ESCAPE) { 268 c = src[++i]; 269 if (c == ESCAPE) { 270 target[ai++] = c; 271 } else { 272 int32_t runLength = (int32_t) c; 273 uint16_t runValue = src[++i]; 274 int j=0; 275 for (; j<runLength; ++j) { 276 target[ai++] = runValue; 277 } 278 } 279 } 280 else { 281 target[ai++] = c; 282 } 283 } 284 285 if (ai != length){ 286 *status = U_INTERNAL_PROGRAM_ERROR; 287 } 288 289 return length; 290 } 291 292 /** 293 * Construct an array of bytes from a run-length encoded string. 294 */ 295 int32_t 296 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { 297 298 int32_t length = 0; 299 UBool nextChar = true; 300 uint16_t c = 0; 301 int32_t node = 0; 302 int32_t runLength = 0; 303 int32_t i = 2; 304 int32_t ai=0; 305 306 if(!status || U_FAILURE(*status)){ 307 return 0; 308 } 309 /* the source is null terminated */ 310 if(srcLen == -1){ 311 srcLen = u_strlen(src); 312 } 313 if(srcLen <= 2){ 314 return 2; 315 } 316 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 317 318 if(target == NULL){ 319 return length; 320 } 321 if(tgtLen < length){ 322 *status = U_BUFFER_OVERFLOW_ERROR; 323 return length; 324 } 325 326 for (; ai<tgtLen; ) { 327 /* This part of the loop places the next byte into the local 328 * variable 'b' each time through the loop. It keeps the 329 * current character in 'c' and uses the boolean 'nextChar' 330 * to see if we've taken both bytes out of 'c' yet. 331 */ 332 uint8_t b; 333 if (nextChar) { 334 c = src[i++]; 335 b = (uint8_t) (c >> 8); 336 nextChar = false; 337 } 338 else { 339 b = (uint8_t) (c & 0xFF); 340 nextChar = true; 341 } 342 343 /* This part of the loop is a tiny state machine which handles 344 * the parsing of the run-length encoding. This would be simpler 345 * if we could look ahead, but we can't, so we use 'node' to 346 * move between three nodes in the state machine. 347 */ 348 switch (node) { 349 case 0: 350 /* Normal idle node */ 351 if (b == ESCAPE_BYTE) { 352 node = 1; 353 } 354 else { 355 target[ai++] = b; 356 } 357 break; 358 case 1: 359 /* We have seen one ESCAPE_BYTE; we expect either a second 360 * one, or a run length and value. 361 */ 362 if (b == ESCAPE_BYTE) { 363 target[ai++] = ESCAPE_BYTE; 364 node = 0; 365 } 366 else { 367 runLength = b; 368 node = 2; 369 } 370 break; 371 case 2: 372 { 373 int j=0; 374 /* We have seen an ESCAPE_BYTE and length byte. We interpret 375 * the next byte as the value to be repeated. 376 */ 377 for (; j<runLength; ++j){ 378 if(ai<tgtLen){ 379 target[ai++] = b; 380 }else{ 381 *status = U_BUFFER_OVERFLOW_ERROR; 382 return ai; 383 } 384 } 385 node = 0; 386 break; 387 } 388 } 389 } 390 391 if (node != 0){ 392 *status = U_INTERNAL_PROGRAM_ERROR; 393 /*("Bad run-length encoded byte array")*/ 394 return 0; 395 } 396 397 398 if (i != srcLen){ 399 /*("Excess data in RLE byte array string");*/ 400 *status = U_INTERNAL_PROGRAM_ERROR; 401 return ai; 402 } 403 404 return ai; 405 }