tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

xz_dec_bcj.c (13905B)


      1 /*
      2 * Branch/Call/Jump (BCJ) filter decoders
      3 *
      4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
      5 *          Igor Pavlov <http://7-zip.org/>
      6 *
      7 * This file has been put into the public domain.
      8 * You can do whatever you want with this file.
      9 */
     10 
     11 #include "xz_private.h"
     12 
     13 /*
     14 * The rest of the file is inside this ifdef. It makes things a little more
     15 * convenient when building without support for any BCJ filters.
     16 */
     17 #ifdef XZ_DEC_BCJ
     18 
     19 struct xz_dec_bcj {
     20 /* Type of the BCJ filter being used */
     21 enum {
     22 	BCJ_X86 = 4,        /* x86 or x86-64 */
     23 	BCJ_POWERPC = 5,    /* Big endian only */
     24 	BCJ_IA64 = 6,       /* Big or little endian */
     25 	BCJ_ARM = 7,        /* Little endian only */
     26 	BCJ_ARMTHUMB = 8,   /* Little endian only */
     27 	BCJ_SPARC = 9       /* Big or little endian */
     28 } type;
     29 
     30 /*
     31  * Return value of the next filter in the chain. We need to preserve
     32  * this information across calls, because we must not call the next
     33  * filter anymore once it has returned XZ_STREAM_END.
     34  */
     35 enum xz_ret ret;
     36 
     37 /* True if we are operating in single-call mode. */
     38 bool single_call;
     39 
     40 /*
     41  * Absolute position relative to the beginning of the uncompressed
     42  * data (in a single .xz Block). We care only about the lowest 32
     43  * bits so this doesn't need to be uint64_t even with big files.
     44  */
     45 uint32_t pos;
     46 
     47 /* x86 filter state */
     48 uint32_t x86_prev_mask;
     49 
     50 /* Temporary space to hold the variables from struct xz_buf */
     51 uint8_t *out;
     52 size_t out_pos;
     53 size_t out_size;
     54 
     55 struct {
     56 	/* Amount of already filtered data in the beginning of buf */
     57 	size_t filtered;
     58 
     59 	/* Total amount of data currently stored in buf  */
     60 	size_t size;
     61 
     62 	/*
     63 	 * Buffer to hold a mix of filtered and unfiltered data. This
     64 	 * needs to be big enough to hold Alignment + 2 * Look-ahead:
     65 	 *
     66 	 * Type         Alignment   Look-ahead
     67 	 * x86              1           4
     68 	 * PowerPC          4           0
     69 	 * IA-64           16           0
     70 	 * ARM              4           0
     71 	 * ARM-Thumb        2           2
     72 	 * SPARC            4           0
     73 	 */
     74 	uint8_t buf[16];
     75 } temp;
     76 };
     77 
     78 #ifdef XZ_DEC_X86
     79 /*
     80 * This is used to test the most significant byte of a memory address
     81 * in an x86 instruction.
     82 */
     83 static inline int bcj_x86_test_msbyte(uint8_t b)
     84 {
     85 return b == 0x00 || b == 0xFF;
     86 }
     87 
     88 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
     89 {
     90 static const bool mask_to_allowed_status[8]
     91 	= { true, true, true, false, true, false, false, false };
     92 
     93 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
     94 
     95 size_t i;
     96 size_t prev_pos = (size_t)-1;
     97 uint32_t prev_mask = s->x86_prev_mask;
     98 uint32_t src;
     99 uint32_t dest;
    100 uint32_t j;
    101 uint8_t b;
    102 
    103 if (size <= 4)
    104 	return 0;
    105 
    106 size -= 4;
    107 for (i = 0; i < size; ++i) {
    108 	if ((buf[i] & 0xFE) != 0xE8)
    109 		continue;
    110 
    111 	prev_pos = i - prev_pos;
    112 	if (prev_pos > 3) {
    113 		prev_mask = 0;
    114 	} else {
    115 		prev_mask = (prev_mask << (prev_pos - 1)) & 7;
    116 		if (prev_mask != 0) {
    117 			b = buf[i + 4 - mask_to_bit_num[prev_mask]];
    118 			if (!mask_to_allowed_status[prev_mask]
    119 					|| bcj_x86_test_msbyte(b)) {
    120 				prev_pos = i;
    121 				prev_mask = (prev_mask << 1) | 1;
    122 				continue;
    123 			}
    124 		}
    125 	}
    126 
    127 	prev_pos = i;
    128 
    129 	if (bcj_x86_test_msbyte(buf[i + 4])) {
    130 		src = get_unaligned_le32(buf + i + 1);
    131 		while (true) {
    132 			dest = src - (s->pos + (uint32_t)i + 5);
    133 			if (prev_mask == 0)
    134 				break;
    135 
    136 			j = mask_to_bit_num[prev_mask] * 8;
    137 			b = (uint8_t)(dest >> (24 - j));
    138 			if (!bcj_x86_test_msbyte(b))
    139 				break;
    140 
    141 			src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
    142 		}
    143 
    144 		dest &= 0x01FFFFFF;
    145 		dest |= (uint32_t)0 - (dest & 0x01000000);
    146 		put_unaligned_le32(dest, buf + i + 1);
    147 		i += 4;
    148 	} else {
    149 		prev_mask = (prev_mask << 1) | 1;
    150 	}
    151 }
    152 
    153 prev_pos = i - prev_pos;
    154 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
    155 return i;
    156 }
    157 #endif
    158 
    159 #ifdef XZ_DEC_POWERPC
    160 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    161 {
    162 size_t i;
    163 uint32_t instr;
    164 
    165 for (i = 0; i + 4 <= size; i += 4) {
    166 	instr = get_unaligned_be32(buf + i);
    167 	if ((instr & 0xFC000003) == 0x48000001) {
    168 		instr &= 0x03FFFFFC;
    169 		instr -= s->pos + (uint32_t)i;
    170 		instr &= 0x03FFFFFC;
    171 		instr |= 0x48000001;
    172 		put_unaligned_be32(instr, buf + i);
    173 	}
    174 }
    175 
    176 return i;
    177 }
    178 #endif
    179 
    180 #ifdef XZ_DEC_IA64
    181 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    182 {
    183 static const uint8_t branch_table[32] = {
    184 	0, 0, 0, 0, 0, 0, 0, 0,
    185 	0, 0, 0, 0, 0, 0, 0, 0,
    186 	4, 4, 6, 6, 0, 0, 7, 7,
    187 	4, 4, 0, 0, 4, 4, 0, 0
    188 };
    189 
    190 /*
    191  * The local variables take a little bit stack space, but it's less
    192  * than what LZMA2 decoder takes, so it doesn't make sense to reduce
    193  * stack usage here without doing that for the LZMA2 decoder too.
    194  */
    195 
    196 /* Loop counters */
    197 size_t i;
    198 size_t j;
    199 
    200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
    201 uint32_t slot;
    202 
    203 /* Bitwise offset of the instruction indicated by slot */
    204 uint32_t bit_pos;
    205 
    206 /* bit_pos split into byte and bit parts */
    207 uint32_t byte_pos;
    208 uint32_t bit_res;
    209 
    210 /* Address part of an instruction */
    211 uint32_t addr;
    212 
    213 /* Mask used to detect which instructions to convert */
    214 uint32_t mask;
    215 
    216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
    217 uint64_t instr;
    218 
    219 /* Instruction normalized with bit_res for easier manipulation */
    220 uint64_t norm;
    221 
    222 for (i = 0; i + 16 <= size; i += 16) {
    223 	mask = branch_table[buf[i] & 0x1F];
    224 	for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
    225 		if (((mask >> slot) & 1) == 0)
    226 			continue;
    227 
    228 		byte_pos = bit_pos >> 3;
    229 		bit_res = bit_pos & 7;
    230 		instr = 0;
    231 		for (j = 0; j < 6; ++j)
    232 			instr |= (uint64_t)(buf[i + j + byte_pos])
    233 					<< (8 * j);
    234 
    235 		norm = instr >> bit_res;
    236 
    237 		if (((norm >> 37) & 0x0F) == 0x05
    238 				&& ((norm >> 9) & 0x07) == 0) {
    239 			addr = (norm >> 13) & 0x0FFFFF;
    240 			addr |= ((uint32_t)(norm >> 36) & 1) << 20;
    241 			addr <<= 4;
    242 			addr -= s->pos + (uint32_t)i;
    243 			addr >>= 4;
    244 
    245 			norm &= ~((uint64_t)0x8FFFFF << 13);
    246 			norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
    247 			norm |= (uint64_t)(addr & 0x100000)
    248 					<< (36 - 20);
    249 
    250 			instr &= (1 << bit_res) - 1;
    251 			instr |= norm << bit_res;
    252 
    253 			for (j = 0; j < 6; j++)
    254 				buf[i + j + byte_pos]
    255 					= (uint8_t)(instr >> (8 * j));
    256 		}
    257 	}
    258 }
    259 
    260 return i;
    261 }
    262 #endif
    263 
    264 #ifdef XZ_DEC_ARM
    265 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    266 {
    267 size_t i;
    268 uint32_t addr;
    269 
    270 for (i = 0; i + 4 <= size; i += 4) {
    271 	if (buf[i + 3] == 0xEB) {
    272 		addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
    273 				| ((uint32_t)buf[i + 2] << 16);
    274 		addr <<= 2;
    275 		addr -= s->pos + (uint32_t)i + 8;
    276 		addr >>= 2;
    277 		buf[i] = (uint8_t)addr;
    278 		buf[i + 1] = (uint8_t)(addr >> 8);
    279 		buf[i + 2] = (uint8_t)(addr >> 16);
    280 	}
    281 }
    282 
    283 return i;
    284 }
    285 #endif
    286 
    287 #ifdef XZ_DEC_ARMTHUMB
    288 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    289 {
    290 size_t i;
    291 uint32_t addr;
    292 
    293 for (i = 0; i + 4 <= size; i += 2) {
    294 	if ((buf[i + 1] & 0xF8) == 0xF0
    295 			&& (buf[i + 3] & 0xF8) == 0xF8) {
    296 		addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
    297 				| ((uint32_t)buf[i] << 11)
    298 				| (((uint32_t)buf[i + 3] & 0x07) << 8)
    299 				| (uint32_t)buf[i + 2];
    300 		addr <<= 1;
    301 		addr -= s->pos + (uint32_t)i + 4;
    302 		addr >>= 1;
    303 		buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
    304 		buf[i] = (uint8_t)(addr >> 11);
    305 		buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
    306 		buf[i + 2] = (uint8_t)addr;
    307 		i += 2;
    308 	}
    309 }
    310 
    311 return i;
    312 }
    313 #endif
    314 
    315 #ifdef XZ_DEC_SPARC
    316 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    317 {
    318 size_t i;
    319 uint32_t instr;
    320 
    321 for (i = 0; i + 4 <= size; i += 4) {
    322 	instr = get_unaligned_be32(buf + i);
    323 	if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
    324 		instr <<= 2;
    325 		instr -= s->pos + (uint32_t)i;
    326 		instr >>= 2;
    327 		instr = ((uint32_t)0x40000000 - (instr & 0x400000))
    328 				| 0x40000000 | (instr & 0x3FFFFF);
    329 		put_unaligned_be32(instr, buf + i);
    330 	}
    331 }
    332 
    333 return i;
    334 }
    335 #endif
    336 
    337 /*
    338 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
    339 * of data that got filtered.
    340 *
    341 * NOTE: This is implemented as a switch statement to avoid using function
    342 * pointers, which could be problematic in the kernel boot code, which must
    343 * avoid pointers to static data (at least on x86).
    344 */
    345 static void bcj_apply(struct xz_dec_bcj *s,
    346 	      uint8_t *buf, size_t *pos, size_t size)
    347 {
    348 size_t filtered;
    349 
    350 buf += *pos;
    351 size -= *pos;
    352 
    353 switch (s->type) {
    354 #ifdef XZ_DEC_X86
    355 case BCJ_X86:
    356 	filtered = bcj_x86(s, buf, size);
    357 	break;
    358 #endif
    359 #ifdef XZ_DEC_POWERPC
    360 case BCJ_POWERPC:
    361 	filtered = bcj_powerpc(s, buf, size);
    362 	break;
    363 #endif
    364 #ifdef XZ_DEC_IA64
    365 case BCJ_IA64:
    366 	filtered = bcj_ia64(s, buf, size);
    367 	break;
    368 #endif
    369 #ifdef XZ_DEC_ARM
    370 case BCJ_ARM:
    371 	filtered = bcj_arm(s, buf, size);
    372 	break;
    373 #endif
    374 #ifdef XZ_DEC_ARMTHUMB
    375 case BCJ_ARMTHUMB:
    376 	filtered = bcj_armthumb(s, buf, size);
    377 	break;
    378 #endif
    379 #ifdef XZ_DEC_SPARC
    380 case BCJ_SPARC:
    381 	filtered = bcj_sparc(s, buf, size);
    382 	break;
    383 #endif
    384 default:
    385 	/* Never reached but silence compiler warnings. */
    386 	filtered = 0;
    387 	break;
    388 }
    389 
    390 *pos += filtered;
    391 s->pos += filtered;
    392 }
    393 
    394 /*
    395 * Flush pending filtered data from temp to the output buffer.
    396 * Move the remaining mixture of possibly filtered and unfiltered
    397 * data to the beginning of temp.
    398 */
    399 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
    400 {
    401 size_t copy_size;
    402 
    403 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
    404 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
    405 b->out_pos += copy_size;
    406 
    407 s->temp.filtered -= copy_size;
    408 s->temp.size -= copy_size;
    409 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
    410 }
    411 
    412 /*
    413 * The BCJ filter functions are primitive in sense that they process the
    414 * data in chunks of 1-16 bytes. To hide this issue, this function does
    415 * some buffering.
    416 */
    417 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
    418 			     struct xz_dec_lzma2 *lzma2,
    419 			     struct xz_buf *b)
    420 {
    421 size_t out_start;
    422 
    423 /*
    424  * Flush pending already filtered data to the output buffer. Return
    425  * immediatelly if we couldn't flush everything, or if the next
    426  * filter in the chain had already returned XZ_STREAM_END.
    427  */
    428 if (s->temp.filtered > 0) {
    429 	bcj_flush(s, b);
    430 	if (s->temp.filtered > 0)
    431 		return XZ_OK;
    432 
    433 	if (s->ret == XZ_STREAM_END)
    434 		return XZ_STREAM_END;
    435 }
    436 
    437 /*
    438  * If we have more output space than what is currently pending in
    439  * temp, copy the unfiltered data from temp to the output buffer
    440  * and try to fill the output buffer by decoding more data from the
    441  * next filter in the chain. Apply the BCJ filter on the new data
    442  * in the output buffer. If everything cannot be filtered, copy it
    443  * to temp and rewind the output buffer position accordingly.
    444  *
    445  * This needs to be always run when temp.size == 0 to handle a special
    446  * case where the output buffer is full and the next filter has no
    447  * more output coming but hasn't returned XZ_STREAM_END yet.
    448  */
    449 if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
    450 	out_start = b->out_pos;
    451 	memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
    452 	b->out_pos += s->temp.size;
    453 
    454 	s->ret = xz_dec_lzma2_run(lzma2, b);
    455 	if (s->ret != XZ_STREAM_END
    456 			&& (s->ret != XZ_OK || s->single_call))
    457 		return s->ret;
    458 
    459 	bcj_apply(s, b->out, &out_start, b->out_pos);
    460 
    461 	/*
    462 	 * As an exception, if the next filter returned XZ_STREAM_END,
    463 	 * we can do that too, since the last few bytes that remain
    464 	 * unfiltered are meant to remain unfiltered.
    465 	 */
    466 	if (s->ret == XZ_STREAM_END)
    467 		return XZ_STREAM_END;
    468 
    469 	s->temp.size = b->out_pos - out_start;
    470 	b->out_pos -= s->temp.size;
    471 	memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
    472 
    473 	/*
    474 	 * If there wasn't enough input to the next filter to fill
    475 	 * the output buffer with unfiltered data, there's no point
    476 	 * to try decoding more data to temp.
    477 	 */
    478 	if (b->out_pos + s->temp.size < b->out_size)
    479 		return XZ_OK;
    480 }
    481 
    482 /*
    483  * We have unfiltered data in temp. If the output buffer isn't full
    484  * yet, try to fill the temp buffer by decoding more data from the
    485  * next filter. Apply the BCJ filter on temp. Then we hopefully can
    486  * fill the actual output buffer by copying filtered data from temp.
    487  * A mix of filtered and unfiltered data may be left in temp; it will
    488  * be taken care on the next call to this function.
    489  */
    490 if (b->out_pos < b->out_size) {
    491 	/* Make b->out{,_pos,_size} temporarily point to s->temp. */
    492 	s->out = b->out;
    493 	s->out_pos = b->out_pos;
    494 	s->out_size = b->out_size;
    495 	b->out = s->temp.buf;
    496 	b->out_pos = s->temp.size;
    497 	b->out_size = sizeof(s->temp.buf);
    498 
    499 	s->ret = xz_dec_lzma2_run(lzma2, b);
    500 
    501 	s->temp.size = b->out_pos;
    502 	b->out = s->out;
    503 	b->out_pos = s->out_pos;
    504 	b->out_size = s->out_size;
    505 
    506 	if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
    507 		return s->ret;
    508 
    509 	bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
    510 
    511 	/*
    512 	 * If the next filter returned XZ_STREAM_END, we mark that
    513 	 * everything is filtered, since the last unfiltered bytes
    514 	 * of the stream are meant to be left as is.
    515 	 */
    516 	if (s->ret == XZ_STREAM_END)
    517 		s->temp.filtered = s->temp.size;
    518 
    519 	bcj_flush(s, b);
    520 	if (s->temp.filtered > 0)
    521 		return XZ_OK;
    522 }
    523 
    524 return s->ret;
    525 }
    526 
    527 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
    528 {
    529 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
    530 if (s != NULL)
    531 	s->single_call = single_call;
    532 
    533 return s;
    534 }
    535 
    536 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
    537 {
    538 switch (id) {
    539 #ifdef XZ_DEC_X86
    540 case BCJ_X86:
    541 #endif
    542 #ifdef XZ_DEC_POWERPC
    543 case BCJ_POWERPC:
    544 #endif
    545 #ifdef XZ_DEC_IA64
    546 case BCJ_IA64:
    547 #endif
    548 #ifdef XZ_DEC_ARM
    549 case BCJ_ARM:
    550 #endif
    551 #ifdef XZ_DEC_ARMTHUMB
    552 case BCJ_ARMTHUMB:
    553 #endif
    554 #ifdef XZ_DEC_SPARC
    555 case BCJ_SPARC:
    556 #endif
    557 	break;
    558 
    559 default:
    560 	/* Unsupported Filter ID */
    561 	return XZ_OPTIONS_ERROR;
    562 }
    563 
    564 s->type = id;
    565 s->ret = XZ_OK;
    566 s->pos = 0;
    567 s->x86_prev_mask = 0;
    568 s->temp.filtered = 0;
    569 s->temp.size = 0;
    570 
    571 return XZ_OK;
    572 }
    573 
    574 #endif