ans_common.cc (6433B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/ans_common.h" 7 8 #include <cstddef> 9 #include <cstdint> 10 #include <numeric> 11 #include <vector> 12 13 #include "lib/jxl/ans_params.h" 14 #include "lib/jxl/base/status.h" 15 16 namespace jxl { 17 18 std::vector<int32_t> CreateFlatHistogram(int length, int total_count) { 19 JXL_DASSERT(length > 0); 20 JXL_DASSERT(length <= total_count); 21 const int count = total_count / length; 22 std::vector<int32_t> result(length, count); 23 const int rem_counts = total_count % length; 24 for (int i = 0; i < rem_counts; ++i) { 25 ++result[i]; 26 } 27 return result; 28 } 29 30 // First, all trailing non-occurring symbols are removed from the distribution; 31 // if this leaves the distribution empty, a placeholder symbol with max weight 32 // is added. This ensures that the resulting distribution sums to total table 33 // size. Then, `entry_size` is chosen to be the largest power of two so that 34 // `table_size` = ANS_TAB_SIZE/`entry_size` is at least as big as the 35 // distribution size. 36 // Note that each entry will only ever contain two different symbols, and 37 // consecutive ranges of offsets, which allows us to use a compact 38 // representation. 39 // Each entry is initialized with only the (symbol=i, offset) pairs; then 40 // positions for which the entry overflows (i.e. distribution[i] > entry_size) 41 // or is not full are computed, and put into a stack in increasing order. 42 // Missing symbols in the distribution are padded with 0 (because `table_size` 43 // >= number of symbols). The `cutoff` value for each entry is initialized to 44 // the number of occupied slots in that entry (i.e. `distributions[i]`). While 45 // the overflowing-symbol stack is not empty (which implies that the 46 // underflowing-symbol stack also is not), the top overfull and underfull 47 // positions are popped from the stack; the empty slots in the underfull entry 48 // are then filled with as many slots as needed from the overfull entry; such 49 // slots are placed after the slots in the overfull entry, and `offsets[1]` is 50 // computed accordingly. The formerly underfull entry is thus now neither 51 // underfull nor overfull, and represents exactly two symbols. The overfull 52 // entry might be either overfull or underfull, and is pushed into the 53 // corresponding stack. 54 Status InitAliasTable(std::vector<int32_t> distribution, uint32_t log_range, 55 size_t log_alpha_size, 56 AliasTable::Entry* JXL_RESTRICT a) { 57 const uint32_t range = 1 << log_range; 58 const size_t table_size = 1 << log_alpha_size; 59 JXL_ENSURE(table_size <= range); 60 while (!distribution.empty() && distribution.back() == 0) { 61 distribution.pop_back(); 62 } 63 // Ensure that a valid table is always returned, even for an empty 64 // alphabet. Otherwise, a specially-crafted stream might crash the 65 // decoder. 66 if (distribution.empty()) { 67 distribution.emplace_back(range); 68 } 69 JXL_ENSURE(distribution.size() <= table_size); 70 const uint32_t entry_size = range >> log_alpha_size; // this is exact 71 int single_symbol = -1; 72 int sum = 0; 73 // Special case for single-symbol distributions, that ensures that the state 74 // does not change when decoding from such a distribution. Note that, since we 75 // hardcode offset0 == 0, it is not straightforward (if at all possible) to 76 // fix the general case to produce this result. 77 for (size_t sym = 0; sym < distribution.size(); sym++) { 78 int32_t v = distribution[sym]; 79 sum += v; 80 if (v == ANS_TAB_SIZE) { 81 JXL_ENSURE(single_symbol == -1); 82 single_symbol = sym; 83 } 84 } 85 JXL_ENSURE(static_cast<uint32_t>(sum) == range); 86 if (single_symbol != -1) { 87 uint8_t sym = single_symbol; 88 JXL_ENSURE(single_symbol == sym); 89 for (size_t i = 0; i < table_size; i++) { 90 a[i].right_value = sym; 91 a[i].cutoff = 0; 92 a[i].offsets1 = entry_size * i; 93 a[i].freq0 = 0; 94 a[i].freq1_xor_freq0 = ANS_TAB_SIZE; 95 } 96 return true; 97 } 98 99 std::vector<uint32_t> underfull_posn; 100 std::vector<uint32_t> overfull_posn; 101 std::vector<uint32_t> cutoffs(1 << log_alpha_size); 102 // Initialize entries. 103 for (size_t i = 0; i < distribution.size(); i++) { 104 cutoffs[i] = distribution[i]; 105 if (cutoffs[i] > entry_size) { 106 overfull_posn.push_back(i); 107 } else if (cutoffs[i] < entry_size) { 108 underfull_posn.push_back(i); 109 } 110 } 111 for (uint32_t i = distribution.size(); i < table_size; i++) { 112 cutoffs[i] = 0; 113 underfull_posn.push_back(i); 114 } 115 // Reassign overflow/underflow values. 116 while (!overfull_posn.empty()) { 117 uint32_t overfull_i = overfull_posn.back(); 118 overfull_posn.pop_back(); 119 JXL_ENSURE(!underfull_posn.empty()); 120 uint32_t underfull_i = underfull_posn.back(); 121 underfull_posn.pop_back(); 122 uint32_t underfull_by = entry_size - cutoffs[underfull_i]; 123 cutoffs[overfull_i] -= underfull_by; 124 // overfull positions have their original symbols 125 a[underfull_i].right_value = overfull_i; 126 a[underfull_i].offsets1 = cutoffs[overfull_i]; 127 // Slots in the right part of entry underfull_i were taken from the end 128 // of the symbols in entry overfull_i. 129 if (cutoffs[overfull_i] < entry_size) { 130 underfull_posn.push_back(overfull_i); 131 } else if (cutoffs[overfull_i] > entry_size) { 132 overfull_posn.push_back(overfull_i); 133 } 134 } 135 for (uint32_t i = 0; i < table_size; i++) { 136 // cutoffs[i] is properly initialized but the clang-analyzer doesn't infer 137 // it since it is partially initialized across two for-loops. 138 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) 139 if (cutoffs[i] == entry_size) { 140 a[i].right_value = i; 141 a[i].offsets1 = 0; 142 a[i].cutoff = 0; 143 } else { 144 // Note that, if cutoff is not equal to entry_size, 145 // a[i].offsets1 was initialized with (overfull cutoff) - 146 // (entry_size - a[i].cutoff). Thus, subtracting 147 // a[i].cutoff cannot make it negative. 148 a[i].offsets1 -= cutoffs[i]; 149 a[i].cutoff = cutoffs[i]; 150 } 151 const size_t freq0 = i < distribution.size() ? distribution[i] : 0; 152 const size_t i1 = a[i].right_value; 153 const size_t freq1 = i1 < distribution.size() ? distribution[i1] : 0; 154 a[i].freq0 = static_cast<uint16_t>(freq0); 155 a[i].freq1_xor_freq0 = static_cast<uint16_t>(freq1 ^ freq0); 156 } 157 return true; 158 } 159 160 } // namespace jxl