crc32.c (31747B)
1 /* crc32.c -- compute the CRC-32 of a data stream 2 * Copyright (C) 1995-2022 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 * 5 * This interleaved implementation of a CRC makes use of pipelined multiple 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8 */ 9 10 /* @(#) $Id$ */ 11 12 /* 13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 14 protection on the static variables used to control the first-use generation 15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 16 first call get_crc_table() to initialize the tables before allowing more than 17 one thread to use crc32(). 18 19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 20 produced, so that this one source file can be compiled to an executable. 21 */ 22 23 #ifdef MAKECRCH 24 # include <stdio.h> 25 # ifndef DYNAMIC_CRC_TABLE 26 # define DYNAMIC_CRC_TABLE 27 # endif /* !DYNAMIC_CRC_TABLE */ 28 #endif /* MAKECRCH */ 29 30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 31 32 /* 33 A CRC of a message is computed on N braids of words in the message, where 34 each word consists of W bytes (4 or 8). If N is 3, for example, then three 35 running sparse CRCs are calculated respectively on each braid, at these 36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 37 This is done starting at a word boundary, and continues until as many blocks 38 of N * W bytes as are available have been processed. The results are combined 39 into a single CRC at the end. For this code, N must be in the range 1..6 and 40 W must be 4 or 8. The upper limit on N can be increased if desired by adding 41 more #if blocks, extending the patterns apparent in the code. In addition, 42 crc32.h would need to be regenerated, if the maximum N value is increased. 43 44 N and W are chosen empirically by benchmarking the execution time on a given 45 processor. The choices for N and W below were based on testing on Intel Kaby 46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 49 They were all tested with either gcc or clang, all using the -O3 optimization 50 level. Your mileage may vary. 51 */ 52 53 /* Define N */ 54 #ifdef Z_TESTN 55 # define N Z_TESTN 56 #else 57 # define N 5 58 #endif 59 #if N < 1 || N > 6 60 # error N must be in 1..6 61 #endif 62 63 /* 64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 66 that bytes are eight bits. 67 */ 68 69 /* 70 Define W and the associated z_word_t type. If W is not defined, then a 71 braided calculation is not used, and the associated tables and code are not 72 compiled. 73 */ 74 #ifdef Z_TESTW 75 # if Z_TESTW-1 != -1 76 # define W Z_TESTW 77 # endif 78 #else 79 # ifdef MAKECRCH 80 # define W 8 /* required for MAKECRCH */ 81 # else 82 # if defined(__x86_64__) || defined(__aarch64__) 83 # define W 8 84 # else 85 # define W 4 86 # endif 87 # endif 88 #endif 89 #ifdef W 90 # if W == 8 && defined(Z_U8) 91 typedef Z_U8 z_word_t; 92 # elif defined(Z_U4) 93 # undef W 94 # define W 4 95 typedef Z_U4 z_word_t; 96 # else 97 # undef W 98 # endif 99 #endif 100 101 /* If available, use the ARM processor CRC32 instruction. */ 102 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 103 # define ARMCRC32 104 #endif 105 106 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 107 /* 108 Swap the bytes in a z_word_t to convert between little and big endian. Any 109 self-respecting compiler will optimize this to a single machine byte-swap 110 instruction, if one is available. This assumes that word_t is either 32 bits 111 or 64 bits. 112 */ 113 local z_word_t byte_swap(z_word_t word) { 114 # if W == 8 115 return 116 (word & 0xff00000000000000) >> 56 | 117 (word & 0xff000000000000) >> 40 | 118 (word & 0xff0000000000) >> 24 | 119 (word & 0xff00000000) >> 8 | 120 (word & 0xff000000) << 8 | 121 (word & 0xff0000) << 24 | 122 (word & 0xff00) << 40 | 123 (word & 0xff) << 56; 124 # else /* W == 4 */ 125 return 126 (word & 0xff000000) >> 24 | 127 (word & 0xff0000) >> 8 | 128 (word & 0xff00) << 8 | 129 (word & 0xff) << 24; 130 # endif 131 } 132 #endif 133 134 #ifdef DYNAMIC_CRC_TABLE 135 /* ========================================================================= 136 * Table of powers of x for combining CRC-32s, filled in by make_crc_table() 137 * below. 138 */ 139 local z_crc_t FAR x2n_table[32]; 140 #else 141 /* ========================================================================= 142 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 143 * of x for combining CRC-32s, all made by make_crc_table(). 144 */ 145 # include "crc32.h" 146 #endif 147 148 /* CRC polynomial. */ 149 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 150 151 #ifndef Z_FREETYPE 152 153 /* 154 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 155 reflected. For speed, this requires that a not be zero. 156 */ 157 local z_crc_t multmodp(z_crc_t a, z_crc_t b) { 158 z_crc_t m, p; 159 160 m = (z_crc_t)1 << 31; 161 p = 0; 162 for (;;) { 163 if (a & m) { 164 p ^= b; 165 if ((a & (m - 1)) == 0) 166 break; 167 } 168 m >>= 1; 169 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 170 } 171 return p; 172 } 173 174 /* 175 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 176 initialized. 177 */ 178 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { 179 z_crc_t p; 180 181 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 182 while (n) { 183 if (n & 1) 184 p = multmodp(x2n_table[k & 31], p); 185 n >>= 1; 186 k++; 187 } 188 return p; 189 } 190 191 #endif /* !Z_FREETYPE */ 192 193 #ifdef DYNAMIC_CRC_TABLE 194 /* ========================================================================= 195 * Build the tables for byte-wise and braided CRC-32 calculations, and a table 196 * of powers of x for combining CRC-32s. 197 */ 198 local z_crc_t FAR crc_table[256]; 199 #ifdef W 200 local z_word_t FAR crc_big_table[256]; 201 local z_crc_t FAR crc_braid_table[W][256]; 202 local z_word_t FAR crc_braid_big_table[W][256]; 203 local void braid(z_crc_t [][256], z_word_t [][256], int, int); 204 #endif 205 #ifdef MAKECRCH 206 local void write_table(FILE *, const z_crc_t FAR *, int); 207 local void write_table32hi(FILE *, const z_word_t FAR *, int); 208 local void write_table64(FILE *, const z_word_t FAR *, int); 209 #endif /* MAKECRCH */ 210 211 /* 212 Define a once() function depending on the availability of atomics. If this is 213 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 214 multiple threads, and if atomics are not available, then get_crc_table() must 215 be called to initialize the tables and must return before any threads are 216 allowed to compute or combine CRCs. 217 */ 218 219 /* Definition of once functionality. */ 220 typedef struct once_s once_t; 221 222 /* Check for the availability of atomics. */ 223 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 224 !defined(__STDC_NO_ATOMICS__) 225 226 #include <stdatomic.h> 227 228 /* Structure for once(), which must be initialized with ONCE_INIT. */ 229 struct once_s { 230 atomic_flag begun; 231 atomic_int done; 232 }; 233 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 234 235 /* 236 Run the provided init() function exactly once, even if multiple threads 237 invoke once() at the same time. The state must be a once_t initialized with 238 ONCE_INIT. 239 */ 240 local void once(once_t *state, void (*init)(void)) { 241 if (!atomic_load(&state->done)) { 242 if (atomic_flag_test_and_set(&state->begun)) 243 while (!atomic_load(&state->done)) 244 ; 245 else { 246 init(); 247 atomic_store(&state->done, 1); 248 } 249 } 250 } 251 252 #else /* no atomics */ 253 254 /* Structure for once(), which must be initialized with ONCE_INIT. */ 255 struct once_s { 256 volatile int begun; 257 volatile int done; 258 }; 259 #define ONCE_INIT {0, 0} 260 261 /* Test and set. Alas, not atomic, but tries to minimize the period of 262 vulnerability. */ 263 local int test_and_set(int volatile *flag) { 264 int was; 265 266 was = *flag; 267 *flag = 1; 268 return was; 269 } 270 271 /* Run the provided init() function once. This is not thread-safe. */ 272 local void once(once_t *state, void (*init)(void)) { 273 if (!state->done) { 274 if (test_and_set(&state->begun)) 275 while (!state->done) 276 ; 277 else { 278 init(); 279 state->done = 1; 280 } 281 } 282 } 283 284 #endif 285 286 /* State for once(). */ 287 local once_t made = ONCE_INIT; 288 289 /* 290 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 291 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 292 293 Polynomials over GF(2) are represented in binary, one bit per coefficient, 294 with the lowest powers in the most significant bit. Then adding polynomials 295 is just exclusive-or, and multiplying a polynomial by x is a right shift by 296 one. If we call the above polynomial p, and represent a byte as the 297 polynomial q, also with the lowest power in the most significant bit (so the 298 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 299 where a mod b means the remainder after dividing a by b. 300 301 This calculation is done using the shift-register method of multiplying and 302 taking the remainder. The register is initialized to zero, and for each 303 incoming bit, x^32 is added mod p to the register if the bit is a one (where 304 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 305 (which is shifting right by one and adding x^32 mod p if the bit shifted out 306 is a one). We start with the highest power (least significant bit) of q and 307 repeat for all eight bits of q. 308 309 The table is simply the CRC of all possible eight bit values. This is all the 310 information needed to generate CRCs on data a byte at a time for all 311 combinations of CRC register values and incoming bytes. 312 */ 313 314 local void make_crc_table(void) { 315 unsigned i, j, n; 316 z_crc_t p; 317 318 /* initialize the CRC of bytes tables */ 319 for (i = 0; i < 256; i++) { 320 p = i; 321 for (j = 0; j < 8; j++) 322 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 323 crc_table[i] = p; 324 #ifdef W 325 crc_big_table[i] = byte_swap(p); 326 #endif 327 } 328 329 /* initialize the x^2^n mod p(x) table */ 330 p = (z_crc_t)1 << 30; /* x^1 */ 331 x2n_table[0] = p; 332 for (n = 1; n < 32; n++) 333 x2n_table[n] = p = multmodp(p, p); 334 335 #ifdef W 336 /* initialize the braiding tables -- needs x2n_table[] */ 337 braid(crc_braid_table, crc_braid_big_table, N, W); 338 #endif 339 340 #ifdef MAKECRCH 341 { 342 /* 343 The crc32.h header file contains tables for both 32-bit and 64-bit 344 z_word_t's, and so requires a 64-bit type be available. In that case, 345 z_word_t must be defined to be 64-bits. This code then also generates 346 and writes out the tables for the case that z_word_t is 32 bits. 347 */ 348 #if !defined(W) || W != 8 349 # error Need a 64-bit integer type in order to generate crc32.h. 350 #endif 351 FILE *out; 352 int k, n; 353 z_crc_t ltl[8][256]; 354 z_word_t big[8][256]; 355 356 out = fopen("crc32.h", "w"); 357 if (out == NULL) return; 358 359 /* write out little-endian CRC table to crc32.h */ 360 fprintf(out, 361 "/* crc32.h -- tables for rapid CRC calculation\n" 362 " * Generated automatically by crc32.c\n */\n" 363 "\n" 364 "local const z_crc_t FAR crc_table[] = {\n" 365 " "); 366 write_table(out, crc_table, 256); 367 fprintf(out, 368 "};\n"); 369 370 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 371 fprintf(out, 372 "\n" 373 "#ifdef W\n" 374 "\n" 375 "#if W == 8\n" 376 "\n" 377 "local const z_word_t FAR crc_big_table[] = {\n" 378 " "); 379 write_table64(out, crc_big_table, 256); 380 fprintf(out, 381 "};\n"); 382 383 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 384 fprintf(out, 385 "\n" 386 "#else /* W == 4 */\n" 387 "\n" 388 "local const z_word_t FAR crc_big_table[] = {\n" 389 " "); 390 write_table32hi(out, crc_big_table, 256); 391 fprintf(out, 392 "};\n" 393 "\n" 394 "#endif\n"); 395 396 /* write out braid tables for each value of N */ 397 for (n = 1; n <= 6; n++) { 398 fprintf(out, 399 "\n" 400 "#if N == %d\n", n); 401 402 /* compute braid tables for this N and 64-bit word_t */ 403 braid(ltl, big, n, 8); 404 405 /* write out braid tables for 64-bit z_word_t to crc32.h */ 406 fprintf(out, 407 "\n" 408 "#if W == 8\n" 409 "\n" 410 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 411 for (k = 0; k < 8; k++) { 412 fprintf(out, " {"); 413 write_table(out, ltl[k], 256); 414 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 415 } 416 fprintf(out, 417 "};\n" 418 "\n" 419 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 420 for (k = 0; k < 8; k++) { 421 fprintf(out, " {"); 422 write_table64(out, big[k], 256); 423 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 424 } 425 fprintf(out, 426 "};\n"); 427 428 /* compute braid tables for this N and 32-bit word_t */ 429 braid(ltl, big, n, 4); 430 431 /* write out braid tables for 32-bit z_word_t to crc32.h */ 432 fprintf(out, 433 "\n" 434 "#else /* W == 4 */\n" 435 "\n" 436 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 437 for (k = 0; k < 4; k++) { 438 fprintf(out, " {"); 439 write_table(out, ltl[k], 256); 440 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 441 } 442 fprintf(out, 443 "};\n" 444 "\n" 445 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 446 for (k = 0; k < 4; k++) { 447 fprintf(out, " {"); 448 write_table32hi(out, big[k], 256); 449 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 450 } 451 fprintf(out, 452 "};\n" 453 "\n" 454 "#endif\n" 455 "\n" 456 "#endif\n"); 457 } 458 fprintf(out, 459 "\n" 460 "#endif\n"); 461 462 /* write out zeros operator table to crc32.h */ 463 fprintf(out, 464 "\n" 465 "local const z_crc_t FAR x2n_table[] = {\n" 466 " "); 467 write_table(out, x2n_table, 32); 468 fprintf(out, 469 "};\n"); 470 fclose(out); 471 } 472 #endif /* MAKECRCH */ 473 } 474 475 #ifdef MAKECRCH 476 477 /* 478 Write the 32-bit values in table[0..k-1] to out, five per line in 479 hexadecimal separated by commas. 480 */ 481 local void write_table(FILE *out, const z_crc_t FAR *table, int k) { 482 int n; 483 484 for (n = 0; n < k; n++) 485 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 486 (unsigned long)(table[n]), 487 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 488 } 489 490 /* 491 Write the high 32-bits of each value in table[0..k-1] to out, five per line 492 in hexadecimal separated by commas. 493 */ 494 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { 495 int n; 496 497 for (n = 0; n < k; n++) 498 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 499 (unsigned long)(table[n] >> 32), 500 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 501 } 502 503 /* 504 Write the 64-bit values in table[0..k-1] to out, three per line in 505 hexadecimal separated by commas. This assumes that if there is a 64-bit 506 type, then there is also a long long integer type, and it is at least 64 507 bits. If not, then the type cast and format string can be adjusted 508 accordingly. 509 */ 510 local void write_table64(FILE *out, const z_word_t FAR *table, int k) { 511 int n; 512 513 for (n = 0; n < k; n++) 514 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 515 (unsigned long long)(table[n]), 516 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 517 } 518 519 /* Actually do the deed. */ 520 int main(void) { 521 make_crc_table(); 522 return 0; 523 } 524 525 #endif /* MAKECRCH */ 526 527 #ifdef W 528 /* 529 Generate the little and big-endian braid tables for the given n and z_word_t 530 size w. Each array must have room for w blocks of 256 elements. 531 */ 532 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { 533 int k; 534 z_crc_t i, p, q; 535 for (k = 0; k < w; k++) { 536 p = x2nmodp((n * w + 3 - k) << 3, 0); 537 ltl[k][0] = 0; 538 big[w - 1 - k][0] = 0; 539 for (i = 1; i < 256; i++) { 540 ltl[k][i] = q = multmodp(i << 24, p); 541 big[w - 1 - k][i] = byte_swap(q); 542 } 543 } 544 } 545 #endif 546 547 #endif /* DYNAMIC_CRC_TABLE */ 548 549 #ifndef Z_FREETYPE 550 551 /* ========================================================================= 552 * This function can be used by asm versions of crc32(), and to force the 553 * generation of the CRC tables in a threaded application. 554 */ 555 const z_crc_t FAR * ZEXPORT get_crc_table(void) { 556 #ifdef DYNAMIC_CRC_TABLE 557 once(&made, make_crc_table); 558 #endif /* DYNAMIC_CRC_TABLE */ 559 return (const z_crc_t FAR *)crc_table; 560 } 561 562 #endif /* !Z_FREETYPE */ 563 564 /* ========================================================================= 565 * Use ARM machine instructions if available. This will compute the CRC about 566 * ten times faster than the braided calculation. This code does not check for 567 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 568 * only be defined if the compilation specifies an ARM processor architecture 569 * that has the instructions. For example, compiling with -march=armv8.1-a or 570 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 571 * instructions. 572 */ 573 #ifdef ARMCRC32 574 575 /* 576 Constants empirically determined to maximize speed. These values are from 577 measurements on a Cortex-A57. Your mileage may vary. 578 */ 579 #define Z_BATCH 3990 /* number of words in a batch */ 580 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 581 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 582 583 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 584 z_size_t len) { 585 z_crc_t val; 586 z_word_t crc1, crc2; 587 const z_word_t *word; 588 z_word_t val0, val1, val2; 589 z_size_t last, last2, i; 590 z_size_t num; 591 592 /* Return initial CRC, if requested. */ 593 if (buf == Z_NULL) return 0; 594 595 #ifdef DYNAMIC_CRC_TABLE 596 once(&made, make_crc_table); 597 #endif /* DYNAMIC_CRC_TABLE */ 598 599 /* Pre-condition the CRC */ 600 crc = (~crc) & 0xffffffff; 601 602 /* Compute the CRC up to a word boundary. */ 603 while (len && ((z_size_t)buf & 7) != 0) { 604 len--; 605 val = *buf++; 606 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 607 } 608 609 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 610 word = (z_word_t const *)buf; 611 num = len >> 3; 612 len &= 7; 613 614 /* Do three interleaved CRCs to realize the throughput of one crc32x 615 instruction per cycle. Each CRC is calculated on Z_BATCH words. The 616 three CRCs are combined into a single CRC after each set of batches. */ 617 while (num >= 3 * Z_BATCH) { 618 crc1 = 0; 619 crc2 = 0; 620 for (i = 0; i < Z_BATCH; i++) { 621 val0 = word[i]; 622 val1 = word[i + Z_BATCH]; 623 val2 = word[i + 2 * Z_BATCH]; 624 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 625 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 626 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 627 } 628 word += 3 * Z_BATCH; 629 num -= 3 * Z_BATCH; 630 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 631 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 632 } 633 634 /* Do one last smaller batch with the remaining words, if there are enough 635 to pay for the combination of CRCs. */ 636 last = num / 3; 637 if (last >= Z_BATCH_MIN) { 638 last2 = last << 1; 639 crc1 = 0; 640 crc2 = 0; 641 for (i = 0; i < last; i++) { 642 val0 = word[i]; 643 val1 = word[i + last]; 644 val2 = word[i + last2]; 645 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 646 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 647 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 648 } 649 word += 3 * last; 650 num -= 3 * last; 651 val = x2nmodp(last, 6); 652 crc = multmodp(val, crc) ^ crc1; 653 crc = multmodp(val, crc) ^ crc2; 654 } 655 656 /* Compute the CRC on any remaining words. */ 657 for (i = 0; i < num; i++) { 658 val0 = word[i]; 659 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 660 } 661 word += num; 662 663 /* Complete the CRC on any remaining bytes. */ 664 buf = (const unsigned char FAR *)word; 665 while (len) { 666 len--; 667 val = *buf++; 668 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 669 } 670 671 /* Return the CRC, post-conditioned. */ 672 return crc ^ 0xffffffff; 673 } 674 675 #else 676 677 #ifdef W 678 679 /* 680 Return the CRC of the W bytes in the word_t data, taking the 681 least-significant byte of the word as the first byte of data, without any pre 682 or post conditioning. This is used to combine the CRCs of each braid. 683 */ 684 local z_crc_t crc_word(z_word_t data) { 685 int k; 686 for (k = 0; k < W; k++) 687 data = (data >> 8) ^ crc_table[data & 0xff]; 688 return (z_crc_t)data; 689 } 690 691 local z_word_t crc_word_big(z_word_t data) { 692 int k; 693 for (k = 0; k < W; k++) 694 data = (data << 8) ^ 695 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 696 return data; 697 } 698 699 #endif 700 701 /* ========================================================================= */ 702 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 703 z_size_t len) { 704 /* Return initial CRC, if requested. */ 705 if (buf == Z_NULL) return 0; 706 707 #ifdef DYNAMIC_CRC_TABLE 708 once(&made, make_crc_table); 709 #endif /* DYNAMIC_CRC_TABLE */ 710 711 /* Pre-condition the CRC */ 712 crc = (~crc) & 0xffffffff; 713 714 #ifdef W 715 716 /* If provided enough bytes, do a braided CRC calculation. */ 717 if (len >= N * W + W - 1) { 718 z_size_t blks; 719 z_word_t const *words; 720 unsigned endian; 721 int k; 722 723 /* Compute the CRC up to a z_word_t boundary. */ 724 while (len && ((z_size_t)buf & (W - 1)) != 0) { 725 len--; 726 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 727 } 728 729 /* Compute the CRC on as many N z_word_t blocks as are available. */ 730 blks = len / (N * W); 731 len -= blks * N * W; 732 words = (z_word_t const *)buf; 733 734 /* Do endian check at execution time instead of compile time, since ARM 735 processors can change the endianness at execution time. If the 736 compiler knows what the endianness will be, it can optimize out the 737 check and the unused branch. */ 738 endian = 1; 739 if (*(unsigned char *)&endian) { 740 /* Little endian. */ 741 742 z_crc_t crc0; 743 z_word_t word0; 744 #if N > 1 745 z_crc_t crc1; 746 z_word_t word1; 747 #if N > 2 748 z_crc_t crc2; 749 z_word_t word2; 750 #if N > 3 751 z_crc_t crc3; 752 z_word_t word3; 753 #if N > 4 754 z_crc_t crc4; 755 z_word_t word4; 756 #if N > 5 757 z_crc_t crc5; 758 z_word_t word5; 759 #endif 760 #endif 761 #endif 762 #endif 763 #endif 764 765 /* Initialize the CRC for each braid. */ 766 crc0 = crc; 767 #if N > 1 768 crc1 = 0; 769 #if N > 2 770 crc2 = 0; 771 #if N > 3 772 crc3 = 0; 773 #if N > 4 774 crc4 = 0; 775 #if N > 5 776 crc5 = 0; 777 #endif 778 #endif 779 #endif 780 #endif 781 #endif 782 783 /* 784 Process the first blks-1 blocks, computing the CRCs on each braid 785 independently. 786 */ 787 while (--blks) { 788 /* Load the word for each braid into registers. */ 789 word0 = crc0 ^ words[0]; 790 #if N > 1 791 word1 = crc1 ^ words[1]; 792 #if N > 2 793 word2 = crc2 ^ words[2]; 794 #if N > 3 795 word3 = crc3 ^ words[3]; 796 #if N > 4 797 word4 = crc4 ^ words[4]; 798 #if N > 5 799 word5 = crc5 ^ words[5]; 800 #endif 801 #endif 802 #endif 803 #endif 804 #endif 805 words += N; 806 807 /* Compute and update the CRC for each word. The loop should 808 get unrolled. */ 809 crc0 = crc_braid_table[0][word0 & 0xff]; 810 #if N > 1 811 crc1 = crc_braid_table[0][word1 & 0xff]; 812 #if N > 2 813 crc2 = crc_braid_table[0][word2 & 0xff]; 814 #if N > 3 815 crc3 = crc_braid_table[0][word3 & 0xff]; 816 #if N > 4 817 crc4 = crc_braid_table[0][word4 & 0xff]; 818 #if N > 5 819 crc5 = crc_braid_table[0][word5 & 0xff]; 820 #endif 821 #endif 822 #endif 823 #endif 824 #endif 825 for (k = 1; k < W; k++) { 826 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 827 #if N > 1 828 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 829 #if N > 2 830 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 831 #if N > 3 832 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 833 #if N > 4 834 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 835 #if N > 5 836 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 837 #endif 838 #endif 839 #endif 840 #endif 841 #endif 842 } 843 } 844 845 /* 846 Process the last block, combining the CRCs of the N braids at the 847 same time. 848 */ 849 crc = crc_word(crc0 ^ words[0]); 850 #if N > 1 851 crc = crc_word(crc1 ^ words[1] ^ crc); 852 #if N > 2 853 crc = crc_word(crc2 ^ words[2] ^ crc); 854 #if N > 3 855 crc = crc_word(crc3 ^ words[3] ^ crc); 856 #if N > 4 857 crc = crc_word(crc4 ^ words[4] ^ crc); 858 #if N > 5 859 crc = crc_word(crc5 ^ words[5] ^ crc); 860 #endif 861 #endif 862 #endif 863 #endif 864 #endif 865 words += N; 866 } 867 else { 868 /* Big endian. */ 869 870 z_word_t crc0, word0, comb; 871 #if N > 1 872 z_word_t crc1, word1; 873 #if N > 2 874 z_word_t crc2, word2; 875 #if N > 3 876 z_word_t crc3, word3; 877 #if N > 4 878 z_word_t crc4, word4; 879 #if N > 5 880 z_word_t crc5, word5; 881 #endif 882 #endif 883 #endif 884 #endif 885 #endif 886 887 /* Initialize the CRC for each braid. */ 888 crc0 = byte_swap(crc); 889 #if N > 1 890 crc1 = 0; 891 #if N > 2 892 crc2 = 0; 893 #if N > 3 894 crc3 = 0; 895 #if N > 4 896 crc4 = 0; 897 #if N > 5 898 crc5 = 0; 899 #endif 900 #endif 901 #endif 902 #endif 903 #endif 904 905 /* 906 Process the first blks-1 blocks, computing the CRCs on each braid 907 independently. 908 */ 909 while (--blks) { 910 /* Load the word for each braid into registers. */ 911 word0 = crc0 ^ words[0]; 912 #if N > 1 913 word1 = crc1 ^ words[1]; 914 #if N > 2 915 word2 = crc2 ^ words[2]; 916 #if N > 3 917 word3 = crc3 ^ words[3]; 918 #if N > 4 919 word4 = crc4 ^ words[4]; 920 #if N > 5 921 word5 = crc5 ^ words[5]; 922 #endif 923 #endif 924 #endif 925 #endif 926 #endif 927 words += N; 928 929 /* Compute and update the CRC for each word. The loop should 930 get unrolled. */ 931 crc0 = crc_braid_big_table[0][word0 & 0xff]; 932 #if N > 1 933 crc1 = crc_braid_big_table[0][word1 & 0xff]; 934 #if N > 2 935 crc2 = crc_braid_big_table[0][word2 & 0xff]; 936 #if N > 3 937 crc3 = crc_braid_big_table[0][word3 & 0xff]; 938 #if N > 4 939 crc4 = crc_braid_big_table[0][word4 & 0xff]; 940 #if N > 5 941 crc5 = crc_braid_big_table[0][word5 & 0xff]; 942 #endif 943 #endif 944 #endif 945 #endif 946 #endif 947 for (k = 1; k < W; k++) { 948 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 949 #if N > 1 950 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 951 #if N > 2 952 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 953 #if N > 3 954 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 955 #if N > 4 956 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 957 #if N > 5 958 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 959 #endif 960 #endif 961 #endif 962 #endif 963 #endif 964 } 965 } 966 967 /* 968 Process the last block, combining the CRCs of the N braids at the 969 same time. 970 */ 971 comb = crc_word_big(crc0 ^ words[0]); 972 #if N > 1 973 comb = crc_word_big(crc1 ^ words[1] ^ comb); 974 #if N > 2 975 comb = crc_word_big(crc2 ^ words[2] ^ comb); 976 #if N > 3 977 comb = crc_word_big(crc3 ^ words[3] ^ comb); 978 #if N > 4 979 comb = crc_word_big(crc4 ^ words[4] ^ comb); 980 #if N > 5 981 comb = crc_word_big(crc5 ^ words[5] ^ comb); 982 #endif 983 #endif 984 #endif 985 #endif 986 #endif 987 words += N; 988 crc = byte_swap(comb); 989 } 990 991 /* 992 Update the pointer to the remaining bytes to process. 993 */ 994 buf = (unsigned char const *)words; 995 } 996 997 #endif /* W */ 998 999 /* Complete the computation of the CRC on any remaining bytes. */ 1000 while (len >= 8) { 1001 len -= 8; 1002 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1003 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1004 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1005 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1006 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1007 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1008 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1009 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1010 } 1011 while (len) { 1012 len--; 1013 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1014 } 1015 1016 /* Return the CRC, post-conditioned. */ 1017 return crc ^ 0xffffffff; 1018 } 1019 1020 #endif 1021 1022 /* ========================================================================= */ 1023 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, 1024 uInt len) { 1025 return crc32_z(crc, buf, len); 1026 } 1027 1028 #ifndef Z_FREETYPE 1029 1030 /* ========================================================================= */ 1031 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { 1032 #ifdef DYNAMIC_CRC_TABLE 1033 once(&made, make_crc_table); 1034 #endif /* DYNAMIC_CRC_TABLE */ 1035 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1036 } 1037 1038 /* ========================================================================= */ 1039 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { 1040 return crc32_combine64(crc1, crc2, (z_off64_t)len2); 1041 } 1042 1043 /* ========================================================================= */ 1044 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { 1045 #ifdef DYNAMIC_CRC_TABLE 1046 once(&made, make_crc_table); 1047 #endif /* DYNAMIC_CRC_TABLE */ 1048 return x2nmodp(len2, 3); 1049 } 1050 1051 /* ========================================================================= */ 1052 uLong ZEXPORT crc32_combine_gen(z_off_t len2) { 1053 return crc32_combine_gen64((z_off64_t)len2); 1054 } 1055 1056 /* ========================================================================= */ 1057 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { 1058 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1059 } 1060 1061 #endif /* !Z_FREETYPE */