tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

crc.cc (15127B)


      1 // Copyright 2022 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Implementation of CRCs (aka Rabin Fingerprints).
     16 // Treats the input as a polynomial with coefficients in Z(2),
     17 // and finds the remainder when divided by an irreducible polynomial
     18 // of the appropriate length.
     19 // It handles all CRC sizes from 8 to 128 bits.
     20 // It's somewhat complicated by having separate implementations optimized for
     21 // CRC's <=32 bits, <= 64 bits, and <= 128 bits.
     22 // The input string is prefixed with a "1" bit, and has "degree" "0" bits
     23 // appended to it before the remainder is found.   This ensures that
     24 // short strings are scrambled somewhat and that strings consisting
     25 // of all nulls have a non-zero CRC.
     26 //
     27 // Uses the "interleaved word-by-word" method from
     28 // "Everything we know about CRC but afraid to forget" by Andrew Kadatch
     29 // and Bob Jenkins,
     30 // http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
     31 //
     32 // The idea is to compute kStride CRCs simultaneously, allowing the
     33 // processor to more effectively use multiple execution units. Each of
     34 // the CRCs is calculated on one word of data followed by kStride - 1
     35 // words of zeroes; the CRC starting points are staggered by one word.
     36 // Assuming a stride of 4 with data words "ABCDABCDABCD", the first
     37 // CRC is over A000A000A, the second over 0B000B000B, and so on.
     38 // The CRC of the whole data is then calculated by properly aligning the
     39 // CRCs by appending zeroes until the data lengths agree then XORing
     40 // the CRCs.
     41 
     42 #include "absl/crc/internal/crc.h"
     43 
     44 #include <cstdint>
     45 
     46 #include "absl/base/internal/endian.h"
     47 #include "absl/base/internal/raw_logging.h"
     48 #include "absl/base/prefetch.h"
     49 #include "absl/crc/internal/crc_internal.h"
     50 
     51 namespace absl {
     52 ABSL_NAMESPACE_BEGIN
     53 namespace crc_internal {
     54 
     55 namespace {
     56 
     57 // Constants
     58 #if defined(__i386__) || defined(__x86_64__)
     59 constexpr bool kNeedAlignedLoads = false;
     60 #else
     61 constexpr bool kNeedAlignedLoads = true;
     62 #endif
     63 
     64 // We express the number of zeroes as a number in base ZEROES_BASE. By
     65 // pre-computing the zero extensions for all possible components of such an
     66 // expression (numbers in a form a*ZEROES_BASE**b), we can calculate the
     67 // resulting extension by multiplying the extensions for individual components
     68 // using log_{ZEROES_BASE}(num_zeroes) polynomial multiplications. The tables of
     69 // zero extensions contain (ZEROES_BASE - 1) * (log_{ZEROES_BASE}(64)) entries.
     70 constexpr int ZEROES_BASE_LG = 4;                   // log_2(ZEROES_BASE)
     71 constexpr int ZEROES_BASE = (1 << ZEROES_BASE_LG);  // must be a power of 2
     72 
     73 constexpr uint32_t kCrc32cPoly = 0x82f63b78;
     74 
     75 uint32_t ReverseBits(uint32_t bits) {
     76  bits = (bits & 0xaaaaaaaau) >> 1 | (bits & 0x55555555u) << 1;
     77  bits = (bits & 0xccccccccu) >> 2 | (bits & 0x33333333u) << 2;
     78  bits = (bits & 0xf0f0f0f0u) >> 4 | (bits & 0x0f0f0f0fu) << 4;
     79  return absl::gbswap_32(bits);
     80 }
     81 
     82 // Polynomial long multiplication mod the polynomial of degree 32.
     83 void PolyMultiply(uint32_t* val, uint32_t m, uint32_t poly) {
     84  uint32_t l = *val;
     85  uint32_t result = 0;
     86  auto onebit = uint32_t{0x80000000u};
     87  for (uint32_t one = onebit; one != 0; one >>= 1) {
     88    if ((l & one) != 0) {
     89      result ^= m;
     90    }
     91    if (m & 1) {
     92      m = (m >> 1) ^ poly;
     93    } else {
     94      m >>= 1;
     95    }
     96  }
     97  *val = result;
     98 }
     99 }  // namespace
    100 
    101 void CRCImpl::FillWordTable(uint32_t poly, uint32_t last, int word_size,
    102                            Uint32By256* t) {
    103  for (int j = 0; j != word_size; j++) {  // for each byte of extension....
    104    t[j][0] = 0;                          // a zero has no effect
    105    for (int i = 128; i != 0; i >>= 1) {  // fill in entries for powers of 2
    106      if (j == 0 && i == 128) {
    107        t[j][i] = last;  // top bit in last byte is given
    108      } else {
    109        // each successive power of two is derived from the previous
    110        // one, either in this table, or the last table
    111        uint32_t pred;
    112        if (i == 128) {
    113          pred = t[j - 1][1];
    114        } else {
    115          pred = t[j][i << 1];
    116        }
    117        // Advance the CRC by one bit (multiply by X, and take remainder
    118        // through one step of polynomial long division)
    119        if (pred & 1) {
    120          t[j][i] = (pred >> 1) ^ poly;
    121        } else {
    122          t[j][i] = pred >> 1;
    123        }
    124      }
    125    }
    126    // CRCs have the property that CRC(a xor b) == CRC(a) xor CRC(b)
    127    // so we can make all the tables for non-powers of two by
    128    // xoring previously created entries.
    129    for (int i = 2; i != 256; i <<= 1) {
    130      for (int k = i + 1; k != (i << 1); k++) {
    131        t[j][k] = t[j][i] ^ t[j][k - i];
    132      }
    133    }
    134  }
    135 }
    136 
    137 int CRCImpl::FillZeroesTable(uint32_t poly, Uint32By256* t) {
    138  uint32_t inc = 1;
    139  inc <<= 31;
    140 
    141  // Extend by one zero bit. We know degree > 1 so (inc & 1) == 0.
    142  inc >>= 1;
    143 
    144  // Now extend by 2, 4, and 8 bits, so now `inc` is extended by one zero byte.
    145  for (int i = 0; i < 3; ++i) {
    146    PolyMultiply(&inc, inc, poly);
    147  }
    148 
    149  int j = 0;
    150  for (uint64_t inc_len = 1; inc_len != 0; inc_len <<= ZEROES_BASE_LG) {
    151    // Every entry in the table adds an additional inc_len zeroes.
    152    uint32_t v = inc;
    153    for (int a = 1; a != ZEROES_BASE; a++) {
    154      t[0][j] = v;
    155      PolyMultiply(&v, inc, poly);
    156      j++;
    157    }
    158    inc = v;
    159  }
    160  ABSL_RAW_CHECK(j <= 256, "");
    161  return j;
    162 }
    163 
    164 // Internal version of the "constructor".
    165 CRCImpl* CRCImpl::NewInternal() {
    166  // Find an accelearated implementation first.
    167  CRCImpl* result = TryNewCRC32AcceleratedX86ARMCombined();
    168 
    169  // Fall back to generic implementions if no acceleration is available.
    170  if (result == nullptr) {
    171    result = new CRC32();
    172  }
    173 
    174  result->InitTables();
    175 
    176  return result;
    177 }
    178 
    179 //  The 32-bit implementation
    180 
    181 void CRC32::InitTables() {
    182  // Compute the table for extending a CRC by one byte.
    183  Uint32By256* t = new Uint32By256[4];
    184  FillWordTable(kCrc32cPoly, kCrc32cPoly, 1, t);
    185  for (int i = 0; i != 256; i++) {
    186    this->table0_[i] = t[0][i];
    187  }
    188 
    189  // Construct a table for updating the CRC by 4 bytes data followed by
    190  // 12 bytes of zeroes.
    191  //
    192  // Note: the data word size could be larger than the CRC size; it might
    193  // be slightly faster to use a 64-bit data word, but doing so doubles the
    194  // table size.
    195  uint32_t last = kCrc32cPoly;
    196  const size_t size = 12;
    197  for (size_t i = 0; i < size; ++i) {
    198    last = (last >> 8) ^ this->table0_[last & 0xff];
    199  }
    200  FillWordTable(kCrc32cPoly, last, 4, t);
    201  for (size_t b = 0; b < 4; ++b) {
    202    for (int i = 0; i < 256; ++i) {
    203      this->table_[b][i] = t[b][i];
    204    }
    205  }
    206 
    207  int j = FillZeroesTable(kCrc32cPoly, t);
    208  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->zeroes_)), "");
    209  for (int i = 0; i < j; i++) {
    210    this->zeroes_[i] = t[0][i];
    211  }
    212 
    213  delete[] t;
    214 
    215  // Build up tables for _reversing_ the operation of doing CRC operations on
    216  // zero bytes.
    217 
    218  // In C++, extending `crc` by a single zero bit is done by the following:
    219  // (A)  bool low_bit_set = (crc & 1);
    220  //      crc >>= 1;
    221  //      if (low_bit_set) crc ^= kCrc32cPoly;
    222  //
    223  // In particular note that the high bit of `crc` after this operation will be
    224  // set if and only if the low bit of `crc` was set before it.  This means that
    225  // no information is lost, and the operation can be reversed, as follows:
    226  // (B)  bool high_bit_set = (crc & 0x80000000u);
    227  //      if (high_bit_set) crc ^= kCrc32cPoly;
    228  //      crc <<= 1;
    229  //      if (high_bit_set) crc ^= 1;
    230  //
    231  // Or, equivalently:
    232  // (C)  bool high_bit_set = (crc & 0x80000000u);
    233  //      crc <<= 1;
    234  //      if (high_bit_set) crc ^= ((kCrc32cPoly << 1) ^ 1);
    235  //
    236  // The last observation is, if we store our checksums in variable `rcrc`,
    237  // with order of the bits reversed, the inverse operation becomes:
    238  // (D)  bool low_bit_set = (rcrc & 1);
    239  //      rcrc >>= 1;
    240  //      if (low_bit_set) rcrc ^= ReverseBits((kCrc32cPoly << 1) ^ 1)
    241  //
    242  // This is the same algorithm (A) that we started with, only with a different
    243  // polynomial bit pattern.  This means that by building up our tables with
    244  // this alternate polynomial, we can apply the CRC algorithms to a
    245  // bit-reversed CRC checksum to perform inverse zero-extension.
    246 
    247  const uint32_t kCrc32cUnextendPoly =
    248      ReverseBits(static_cast<uint32_t>((kCrc32cPoly << 1) ^ 1));
    249  FillWordTable(kCrc32cUnextendPoly, kCrc32cUnextendPoly, 1, &reverse_table0_);
    250 
    251  j = FillZeroesTable(kCrc32cUnextendPoly, &reverse_zeroes_);
    252  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->reverse_zeroes_)),
    253                 "");
    254 }
    255 
    256 void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
    257  const uint8_t* p = static_cast<const uint8_t*>(bytes);
    258  const uint8_t* e = p + length;
    259  uint32_t l = *crc;
    260 
    261  auto step_one_byte = [this, &p, &l]() {
    262    int c = (l & 0xff) ^ *p++;
    263    l = this->table0_[c] ^ (l >> 8);
    264  };
    265 
    266  if (kNeedAlignedLoads) {
    267    // point x at first 4-byte aligned byte in string. this might be past the
    268    // end of the string.
    269    const uint8_t* x = RoundUp<4>(p);
    270    if (x <= e) {
    271      // Process bytes until finished or p is 4-byte aligned
    272      while (p != x) {
    273        step_one_byte();
    274      }
    275    }
    276  }
    277 
    278  const size_t kSwathSize = 16;
    279  if (static_cast<size_t>(e - p) >= kSwathSize) {
    280    // Load one swath of data into the operating buffers.
    281    uint32_t buf0 = absl::little_endian::Load32(p) ^ l;
    282    uint32_t buf1 = absl::little_endian::Load32(p + 4);
    283    uint32_t buf2 = absl::little_endian::Load32(p + 8);
    284    uint32_t buf3 = absl::little_endian::Load32(p + 12);
    285    p += kSwathSize;
    286 
    287    // Increment a CRC value by a "swath"; this combines the four bytes
    288    // starting at `ptr` and twelve zero bytes, so that four CRCs can be
    289    // built incrementally and combined at the end.
    290    const auto step_swath = [this](uint32_t crc_in, const std::uint8_t* ptr) {
    291      return absl::little_endian::Load32(ptr) ^
    292             this->table_[3][crc_in & 0xff] ^
    293             this->table_[2][(crc_in >> 8) & 0xff] ^
    294             this->table_[1][(crc_in >> 16) & 0xff] ^
    295             this->table_[0][crc_in >> 24];
    296    };
    297 
    298    // Run one CRC calculation step over all swaths in one 16-byte stride
    299    const auto step_stride = [&]() {
    300      buf0 = step_swath(buf0, p);
    301      buf1 = step_swath(buf1, p + 4);
    302      buf2 = step_swath(buf2, p + 8);
    303      buf3 = step_swath(buf3, p + 12);
    304      p += 16;
    305    };
    306 
    307    // Process kStride interleaved swaths through the data in parallel.
    308    while ((e - p) > kPrefetchHorizon) {
    309      PrefetchToLocalCacheNta(
    310          reinterpret_cast<const void*>(p + kPrefetchHorizon));
    311      // Process 64 bytes at a time
    312      step_stride();
    313      step_stride();
    314      step_stride();
    315      step_stride();
    316    }
    317    while (static_cast<size_t>(e - p) >= kSwathSize) {
    318      step_stride();
    319    }
    320 
    321    // Now advance one word at a time as far as possible. This isn't worth
    322    // doing if we have word-advance tables.
    323    while (static_cast<size_t>(e - p) >= 4) {
    324      buf0 = step_swath(buf0, p);
    325      uint32_t tmp = buf0;
    326      buf0 = buf1;
    327      buf1 = buf2;
    328      buf2 = buf3;
    329      buf3 = tmp;
    330      p += 4;
    331    }
    332 
    333    // Combine the results from the different swaths. This is just a CRC
    334    // on the data values in the bufX words.
    335    auto combine_one_word = [this](uint32_t crc_in, uint32_t w) {
    336      w ^= crc_in;
    337      for (size_t i = 0; i < 4; ++i) {
    338        w = (w >> 8) ^ this->table0_[w & 0xff];
    339      }
    340      return w;
    341    };
    342 
    343    l = combine_one_word(0, buf0);
    344    l = combine_one_word(l, buf1);
    345    l = combine_one_word(l, buf2);
    346    l = combine_one_word(l, buf3);
    347  }
    348 
    349  // Process the last few bytes
    350  while (p != e) {
    351    step_one_byte();
    352  }
    353 
    354  *crc = l;
    355 }
    356 
    357 void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length,
    358                               const uint32_t zeroes_table[256],
    359                               const uint32_t poly_table[256]) {
    360  if (length != 0) {
    361    uint32_t l = *crc;
    362    // For each ZEROES_BASE_LG bits in length
    363    // (after the low-order bits have been removed)
    364    // we lookup the appropriate polynomial in the zeroes_ array
    365    // and do a polynomial long multiplication (mod the CRC polynomial)
    366    // to extend the CRC by the appropriate number of bits.
    367    for (int i = 0; length != 0;
    368         i += ZEROES_BASE - 1, length >>= ZEROES_BASE_LG) {
    369      int c = length & (ZEROES_BASE - 1);  // pick next ZEROES_BASE_LG bits
    370      if (c != 0) {                        // if they are not zero,
    371                                           // multiply by entry in table
    372        // Build a table to aid in multiplying 2 bits at a time.
    373        // It takes too long to build tables for more bits.
    374        uint64_t m = zeroes_table[c + i - 1];
    375        m <<= 1;
    376        uint64_t m2 = m << 1;
    377        uint64_t mtab[4] = {0, m, m2, m2 ^ m};
    378 
    379        // Do the multiply one byte at a time.
    380        uint64_t result = 0;
    381        for (int x = 0; x < 32; x += 8) {
    382          // The carry-less multiply.
    383          result ^= mtab[l & 3] ^ (mtab[(l >> 2) & 3] << 2) ^
    384                    (mtab[(l >> 4) & 3] << 4) ^ (mtab[(l >> 6) & 3] << 6);
    385          l >>= 8;
    386 
    387          // Reduce modulo the polynomial
    388          result = (result >> 8) ^ poly_table[result & 0xff];
    389        }
    390        l = static_cast<uint32_t>(result);
    391      }
    392    }
    393    *crc = l;
    394  }
    395 }
    396 
    397 void CRC32::ExtendByZeroes(uint32_t* crc, size_t length) const {
    398  return CRC32::ExtendByZeroesImpl(crc, length, zeroes_, table0_);
    399 }
    400 
    401 void CRC32::UnextendByZeroes(uint32_t* crc, size_t length) const {
    402  // See the comment in CRC32::InitTables() for an explanation of the algorithm
    403  // below.
    404  *crc = ReverseBits(*crc);
    405  ExtendByZeroesImpl(crc, length, reverse_zeroes_, reverse_table0_);
    406  *crc = ReverseBits(*crc);
    407 }
    408 
    409 void CRC32::Scramble(uint32_t* crc) const {
    410  // Rotate by near half the word size plus 1.  See the scramble comment in
    411  // crc_internal.h for an explanation.
    412  constexpr int scramble_rotate = (32 / 2) + 1;
    413  *crc = RotateRight<uint32_t>(static_cast<unsigned int>(*crc + kScrambleLo),
    414                               32, scramble_rotate) &
    415         MaskOfLength<uint32_t>(32);
    416 }
    417 
    418 void CRC32::Unscramble(uint32_t* crc) const {
    419  constexpr int scramble_rotate = (32 / 2) + 1;
    420  uint64_t rotated = RotateRight<uint32_t>(static_cast<unsigned int>(*crc), 32,
    421                                           32 - scramble_rotate);
    422  *crc = (rotated - kScrambleLo) & MaskOfLength<uint32_t>(32);
    423 }
    424 
    425 // Constructor and destructor for base class CRC.
    426 CRC::~CRC() {}
    427 CRC::CRC() {}
    428 
    429 // The "constructor" for a CRC32C with a standard polynomial.
    430 CRC* CRC::Crc32c() {
    431  static CRC* singleton = CRCImpl::NewInternal();
    432  return singleton;
    433 }
    434 
    435 }  // namespace crc_internal
    436 ABSL_NAMESPACE_END
    437 }  // namespace absl