enc_coeff_order.cc (12602B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <jxl/memory_manager.h> 7 8 #include "lib/jxl/base/status.h" 9 #include "lib/jxl/memory_manager_internal.h" 10 11 // Suppress any -Wdeprecated-declarations warning that might be emitted by 12 // GCC or Clang by std::stable_sort in C++17 or later mode 13 #ifdef __clang__ 14 #pragma clang diagnostic push 15 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 16 #elif defined(__GNUC__) 17 #pragma GCC push_options 18 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 19 #endif 20 21 #include <algorithm> 22 23 #ifdef __clang__ 24 #pragma clang diagnostic pop 25 #elif defined(__GNUC__) 26 #pragma GCC pop_options 27 #endif 28 29 #include <cmath> 30 #include <cstdint> 31 #include <vector> 32 33 #include "lib/jxl/ac_strategy.h" 34 #include "lib/jxl/base/rect.h" 35 #include "lib/jxl/coeff_order.h" 36 #include "lib/jxl/coeff_order_fwd.h" 37 #include "lib/jxl/dct_util.h" 38 #include "lib/jxl/enc_ans.h" 39 #include "lib/jxl/enc_bit_writer.h" 40 #include "lib/jxl/lehmer_code.h" 41 42 namespace jxl { 43 44 struct AuxOut; 45 enum class LayerType : uint8_t; 46 47 std::pair<uint32_t, uint32_t> ComputeUsedOrders( 48 const SpeedTier speed, const AcStrategyImage& ac_strategy, 49 const Rect& rect) { 50 // No coefficient reordering in Falcon or faster. 51 // Only uses DCT8 = 0, so bitfield = 1. 52 if (speed >= SpeedTier::kFalcon) return {1, 1}; 53 54 uint32_t ret = 0; 55 uint32_t ret_customize = 0; 56 size_t xsize_blocks = rect.xsize(); 57 size_t ysize_blocks = rect.ysize(); 58 // TODO(veluca): precompute when doing DCT. 59 for (size_t by = 0; by < ysize_blocks; ++by) { 60 AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); 61 for (size_t bx = 0; bx < xsize_blocks; ++bx) { 62 int ord = kStrategyOrder[acs_row[bx].RawStrategy()]; 63 // Do not customize coefficient orders for blocks bigger than 32x32. 64 ret |= 1u << ord; 65 if (ord > 6) { 66 continue; 67 } 68 ret_customize |= 1u << ord; 69 } 70 } 71 // Use default orders for small images. 72 if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0}; 73 return {ret, ret_customize}; 74 } 75 76 Status ComputeCoeffOrder(SpeedTier speed, const ACImage& acs, 77 const AcStrategyImage& ac_strategy, 78 const FrameDimensions& frame_dim, 79 uint32_t& all_used_orders, uint32_t prev_used_acs, 80 uint32_t current_used_acs, 81 uint32_t current_used_orders, 82 coeff_order_t* JXL_RESTRICT order) { 83 JxlMemoryManager* memory_manager = ac_strategy.memory_manager(); 84 std::vector<int32_t> num_zeros(kCoeffOrderMaxSize); 85 // If compressing at high speed and only using 8x8 DCTs, only consider a 86 // subset of blocks. 87 double block_fraction = 1.0f; 88 // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes 89 // encoding significantly less dense. 90 if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) { 91 block_fraction = 0.5f; 92 } 93 // No need to compute number of zero coefficients if all orders are the 94 // default. 95 if (current_used_orders != 0) { 96 uint64_t threshold = 97 (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction; 98 uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull), 99 static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)}; 100 // Xorshift128+ adapted from xorshift128+-inl.h 101 auto use_sample = [&]() { 102 auto s1 = s[0]; 103 const auto s0 = s[1]; 104 const auto bits = s1 + s0; // b, c 105 s[0] = s0; 106 s1 ^= s1 << 23; 107 s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); 108 s[1] = s1; 109 return (bits >> 32) <= threshold; 110 }; 111 112 // Count number of zero coefficients, separately for each DCT band. 113 // TODO(veluca): precompute when doing DCT. 114 for (size_t group_index = 0; group_index < frame_dim.num_groups; 115 group_index++) { 116 const size_t gx = group_index % frame_dim.xsize_groups; 117 const size_t gy = group_index / frame_dim.xsize_groups; 118 const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks, 119 kGroupDimInBlocks, kGroupDimInBlocks, 120 frame_dim.xsize_blocks, frame_dim.ysize_blocks); 121 ConstACPtr rows[3]; 122 ACType type = acs.Type(); 123 for (size_t c = 0; c < 3; c++) { 124 rows[c] = acs.PlaneRow(c, group_index, 0); 125 } 126 size_t ac_offset = 0; 127 128 // TODO(veluca): SIMDfy. 129 for (size_t by = 0; by < rect.ysize(); ++by) { 130 AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); 131 for (size_t bx = 0; bx < rect.xsize(); ++bx) { 132 AcStrategy acs = acs_row[bx]; 133 if (!acs.IsFirstBlock()) continue; 134 if (!use_sample()) continue; 135 size_t size = kDCTBlockSize << acs.log2_covered_blocks(); 136 for (size_t c = 0; c < 3; ++c) { 137 const size_t order_offset = 138 CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c); 139 if (type == ACType::k16) { 140 for (size_t k = 0; k < size; k++) { 141 bool is_zero = rows[c].ptr16[ac_offset + k] == 0; 142 num_zeros[order_offset + k] += is_zero ? 1 : 0; 143 } 144 } else { 145 for (size_t k = 0; k < size; k++) { 146 bool is_zero = rows[c].ptr32[ac_offset + k] == 0; 147 num_zeros[order_offset + k] += is_zero ? 1 : 0; 148 } 149 } 150 // Ensure LLFs are first in the order. 151 size_t cx = acs.covered_blocks_x(); 152 size_t cy = acs.covered_blocks_y(); 153 CoefficientLayout(&cy, &cx); 154 for (size_t iy = 0; iy < cy; iy++) { 155 for (size_t ix = 0; ix < cx; ix++) { 156 num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1; 157 } 158 } 159 } 160 ac_offset += size; 161 } 162 } 163 } 164 } 165 struct PosAndCount { 166 uint32_t pos; 167 uint32_t count; 168 }; 169 size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(PosAndCount); 170 JXL_ASSIGN_OR_RETURN(auto mem, 171 AlignedMemory::Create(memory_manager, mem_bytes)); 172 173 std::vector<coeff_order_t> natural_order_buffer; 174 175 uint16_t computed = 0; 176 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 177 uint8_t ord = kStrategyOrder[o]; 178 if (computed & (1 << ord)) continue; 179 computed |= 1 << ord; 180 AcStrategy acs = AcStrategy::FromRawStrategy(o); 181 size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y(); 182 183 // Do nothing for transforms that don't appear. 184 if ((1 << ord) & ~current_used_acs) continue; 185 186 // Do nothing if we already committed to this custom order previously. 187 if ((1 << ord) & prev_used_acs) continue; 188 if ((1 << ord) & all_used_orders) continue; 189 190 if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz); 191 acs.ComputeNaturalCoeffOrder(natural_order_buffer.data()); 192 193 // Ensure natural coefficient order is not permuted if the order is 194 // not transmitted. 195 if ((1 << ord) & ~current_used_orders) { 196 for (size_t c = 0; c < 3; c++) { 197 size_t offset = CoeffOrderOffset(ord, c); 198 JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz); 199 memcpy(&order[offset], natural_order_buffer.data(), 200 sz * sizeof(*order)); 201 } 202 continue; 203 } 204 205 bool is_nondefault = false; 206 for (uint8_t c = 0; c < 3; c++) { 207 // Apply zig-zag order. 208 PosAndCount* pos_and_val = mem.address<PosAndCount>(); 209 size_t offset = CoeffOrderOffset(ord, c); 210 JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz); 211 float inv_sqrt_sz = 1.0f / std::sqrt(sz); 212 for (size_t i = 0; i < sz; ++i) { 213 size_t pos = natural_order_buffer[i]; 214 pos_and_val[i].pos = pos; 215 // We don't care for the exact number -> quantize number of zeros, 216 // to get less permuted order. 217 pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f; 218 } 219 220 // Stable-sort -> elements with same number of zeros will preserve their 221 // order. 222 auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool { 223 return a.count < b.count; 224 }; 225 std::stable_sort(pos_and_val, pos_and_val + sz, comparator); 226 227 // Grab indices. 228 for (size_t i = 0; i < sz; ++i) { 229 order[offset + i] = pos_and_val[i].pos; 230 is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos; 231 } 232 } 233 if (!is_nondefault) { 234 current_used_orders &= ~(1 << ord); 235 } 236 } 237 all_used_orders |= current_used_orders; 238 return true; 239 } 240 241 namespace { 242 243 Status TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, 244 size_t size, std::vector<Token>* tokens) { 245 std::vector<LehmerT> lehmer(size); 246 std::vector<uint32_t> temp(size + 1); 247 JXL_RETURN_IF_ERROR( 248 ComputeLehmerCode(order, temp.data(), size, lehmer.data())); 249 size_t end = size; 250 while (end > skip && lehmer[end - 1] == 0) { 251 --end; 252 } 253 tokens->emplace_back(CoeffOrderContext(size), end - skip); 254 uint32_t last = 0; 255 for (size_t i = skip; i < end; ++i) { 256 tokens->emplace_back(CoeffOrderContext(last), lehmer[i]); 257 last = lehmer[i]; 258 } 259 return true; 260 } 261 262 } // namespace 263 264 Status EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, 265 size_t size, BitWriter* writer, LayerType layer, 266 AuxOut* aux_out) { 267 JxlMemoryManager* memory_manager = writer->memory_manager(); 268 std::vector<std::vector<Token>> tokens(1); 269 JXL_RETURN_IF_ERROR(TokenizePermutation(order, skip, size, tokens.data())); 270 std::vector<uint8_t> context_map; 271 EntropyEncodingData codes; 272 JXL_ASSIGN_OR_RETURN( 273 size_t cost, BuildAndEncodeHistograms( 274 memory_manager, HistogramParams(), kPermutationContexts, 275 tokens, &codes, &context_map, writer, layer, aux_out)); 276 (void)cost; 277 JXL_RETURN_IF_ERROR( 278 WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out)); 279 return true; 280 } 281 282 namespace { 283 Status EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs, 284 std::vector<Token>* tokens, coeff_order_t* order_zigzag, 285 std::vector<coeff_order_t>& natural_order_lut) { 286 const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); 287 const size_t size = kDCTBlockSize * llf; 288 for (size_t i = 0; i < size; ++i) { 289 order_zigzag[i] = natural_order_lut[order[i]]; 290 } 291 JXL_RETURN_IF_ERROR(TokenizePermutation(order_zigzag, llf, size, tokens)); 292 return true; 293 } 294 } // namespace 295 296 Status EncodeCoeffOrders(uint16_t used_orders, 297 const coeff_order_t* JXL_RESTRICT order, 298 BitWriter* writer, LayerType layer, 299 AuxOut* JXL_RESTRICT aux_out) { 300 JxlMemoryManager* memory_manager = writer->memory_manager(); 301 size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(coeff_order_t); 302 JXL_ASSIGN_OR_RETURN(auto mem, 303 AlignedMemory::Create(memory_manager, mem_bytes)); 304 uint16_t computed = 0; 305 std::vector<std::vector<Token>> tokens(1); 306 std::vector<coeff_order_t> natural_order_lut; 307 for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { 308 uint8_t ord = kStrategyOrder[o]; 309 if (computed & (1 << ord)) continue; 310 computed |= 1 << ord; 311 if ((used_orders & (1 << ord)) == 0) continue; 312 AcStrategy acs = AcStrategy::FromRawStrategy(o); 313 const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); 314 const size_t size = kDCTBlockSize * llf; 315 if (natural_order_lut.size() < size) natural_order_lut.resize(size); 316 acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data()); 317 for (size_t c = 0; c < 3; c++) { 318 JXL_RETURN_IF_ERROR( 319 EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(), 320 mem.address<coeff_order_t>(), natural_order_lut)); 321 } 322 } 323 // Do not write anything if no order is used. 324 if (used_orders != 0) { 325 std::vector<uint8_t> context_map; 326 EntropyEncodingData codes; 327 JXL_ASSIGN_OR_RETURN( 328 size_t cost, 329 BuildAndEncodeHistograms(memory_manager, HistogramParams(), 330 kPermutationContexts, tokens, &codes, 331 &context_map, writer, layer, aux_out)); 332 (void)cost; 333 JXL_RETURN_IF_ERROR( 334 WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out)); 335 } 336 return true; 337 } 338 339 } // namespace jxl