ans_common.h (6006B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #ifndef LIB_JXL_ANS_COMMON_H_ 7 #define LIB_JXL_ANS_COMMON_H_ 8 9 #include <algorithm> 10 #include <cstddef> 11 #include <cstdint> 12 #include <cstring> 13 #include <hwy/base.h> 14 #include <hwy/cache_control.h> // Prefetch 15 #include <vector> 16 17 #include "lib/jxl/ans_params.h" 18 #include "lib/jxl/base/byte_order.h" 19 #include "lib/jxl/base/compiler_specific.h" 20 #include "lib/jxl/base/status.h" 21 22 namespace jxl { 23 24 // Returns the precision (number of bits) that should be used to store 25 // a histogram count such that Log2Floor(count) == logcount. 26 static JXL_MAYBE_UNUSED JXL_INLINE uint32_t 27 GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { 28 int32_t r = std::min<int>( 29 logcount, static_cast<int>(shift) - 30 static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); 31 if (r < 0) return 0; 32 return r; 33 } 34 35 // Returns a histogram where the counts are positive, differ by at most 1, 36 // and add up to total_count. The bigger counts (if any) are at the beginning 37 // of the histogram. 38 std::vector<int32_t> CreateFlatHistogram(int length, int total_count); 39 40 // An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into 41 // the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions: 42 // - each symbol occurs as many times as specified by any valid distribution 43 // of frequencies of the symbols. A valid distribution here is an array of 44 // ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE], 45 // and whose sum is ANS_TAB_SIZE. 46 // - lookups can be done in constant time, and also return how many smaller 47 // input values map into the same symbol, according to some well-defined order 48 // of input values. 49 // - the space used by the alias table is given by a small constant times the 50 // index of the largest symbol with nonzero probability in the distribution. 51 // Each of the entries in the table covers a range of `entry_size` values in the 52 // [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive 53 // sub-ranges. In the range covered by entry `i`, the first `cutoff` values map 54 // to symbol `i`, while the others map to symbol `right_value`. 55 // 56 // TODO(veluca): consider making the order used for computing offsets easier to 57 // define - it is currently defined by the algorithm to compute the alias table. 58 // Beware of breaking the implicit assumption that symbols that come after the 59 // cutoff value should have an offset at least as big as the cutoff. 60 61 struct AliasTable { 62 struct Symbol { 63 size_t value; 64 size_t offset; 65 size_t freq; 66 }; 67 68 // Working set size matters here (~64 tables x 256 entries). 69 // offsets0 is always zero (beginning of [0] side among the same symbol). 70 // offsets1 is an offset of (pos >= cutoff) side decremented by cutoff. 71 #pragma pack(push, 1) 72 struct Entry { 73 uint8_t cutoff; // < kEntrySizeMinus1 when used by ANS. 74 uint8_t right_value; // < alphabet size. 75 uint16_t freq0; 76 77 // Only used if `greater` (see Lookup) 78 uint16_t offsets1; // <= ANS_TAB_SIZE 79 uint16_t freq1_xor_freq0; // for branchless ternary in Lookup 80 }; 81 #pragma pack(pop) 82 83 // Dividing `value` by `entry_size` determines `i`, the entry which is 84 // responsible for the input. If the remainder is below `cutoff`, then the 85 // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences 86 // of `i` "before" the start of this entry, the offset of the input will be 87 // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped 88 // symbol is `right_value`; since `offsets[1]` stores the number of 89 // occurrences of `right_value` "before" this entry, minus the `cutoff` value, 90 // the input offset is then `remainder + offsets[1]`. 91 static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value, 92 size_t log_entry_size, 93 size_t entry_size_minus_1) { 94 const size_t i = value >> log_entry_size; 95 const size_t pos = value & entry_size_minus_1; 96 97 #if JXL_BYTE_ORDER_LITTLE 98 uint64_t entry; 99 memcpy(&entry, &table[i].cutoff, sizeof(entry)); 100 const size_t cutoff = entry & 0xFF; // = MOVZX 101 const size_t right_value = (entry >> 8) & 0xFF; // = MOVZX 102 const size_t freq0 = (entry >> 16) & 0xFFFF; 103 #else 104 // Generates multiple loads with complex addressing. 105 const size_t cutoff = table[i].cutoff; 106 const size_t right_value = table[i].right_value; 107 const size_t freq0 = table[i].freq0; 108 #endif 109 110 const bool greater = pos >= cutoff; 111 112 #if JXL_BYTE_ORDER_LITTLE 113 const uint64_t conditional = greater ? entry : 0; // = CMOV 114 const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF; 115 const size_t freq1_xor_freq0_or_0 = conditional >> 48; 116 #else 117 const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0; 118 const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0; 119 #endif 120 121 // WARNING: moving this code may interfere with CMOV heuristics. 122 Symbol s; 123 s.value = greater ? right_value : i; 124 s.offset = offsets1_or_0 + pos; 125 s.freq = freq0 ^ freq1_xor_freq0_or_0; // = greater ? freq1 : freq0 126 // XOR avoids implementation-defined conversion from unsigned to signed. 127 // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes 128 // spills, simple ternary has a long dependency chain. 129 130 return s; 131 } 132 133 static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value, 134 size_t log_entry_size) { 135 const size_t i = value >> log_entry_size; 136 hwy::Prefetch(table + i); 137 } 138 }; 139 140 // Computes an alias table for a given distribution. 141 Status InitAliasTable(std::vector<int32_t> distribution, uint32_t log_range, 142 size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a); 143 144 } // namespace jxl 145 146 #endif // LIB_JXL_ANS_COMMON_H_