enc_modular.cc (74353B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/enc_modular.h" 7 8 #include <jxl/memory_manager.h> 9 10 #include <array> 11 #include <cstddef> 12 #include <cstdint> 13 #include <limits> 14 #include <utility> 15 #include <vector> 16 17 #include "lib/jxl/base/compiler_specific.h" 18 #include "lib/jxl/base/printf_macros.h" 19 #include "lib/jxl/base/rect.h" 20 #include "lib/jxl/base/status.h" 21 #include "lib/jxl/chroma_from_luma.h" 22 #include "lib/jxl/compressed_dc.h" 23 #include "lib/jxl/dec_ans.h" 24 #include "lib/jxl/dec_modular.h" 25 #include "lib/jxl/enc_aux_out.h" 26 #include "lib/jxl/enc_bit_writer.h" 27 #include "lib/jxl/enc_cluster.h" 28 #include "lib/jxl/enc_fields.h" 29 #include "lib/jxl/enc_gaborish.h" 30 #include "lib/jxl/enc_params.h" 31 #include "lib/jxl/enc_patch_dictionary.h" 32 #include "lib/jxl/enc_quant_weights.h" 33 #include "lib/jxl/frame_dimensions.h" 34 #include "lib/jxl/frame_header.h" 35 #include "lib/jxl/modular/encoding/context_predict.h" 36 #include "lib/jxl/modular/encoding/enc_encoding.h" 37 #include "lib/jxl/modular/encoding/encoding.h" 38 #include "lib/jxl/modular/encoding/ma_common.h" 39 #include "lib/jxl/modular/modular_image.h" 40 #include "lib/jxl/modular/options.h" 41 #include "lib/jxl/modular/transform/enc_transform.h" 42 #include "lib/jxl/pack_signed.h" 43 #include "lib/jxl/quant_weights.h" 44 #include "modular/options.h" 45 46 namespace jxl { 47 48 namespace { 49 // constexpr bool kPrintTree = false; 50 51 // Squeeze default quantization factors 52 // these quantization factors are for -Q 50 (other qualities simply scale the 53 // factors; things are rounded down and obviously cannot get below 1) 54 const float squeeze_quality_factor = 55 0.35; // for easy tweaking of the quality range (decrease this number for 56 // higher quality) 57 const float squeeze_luma_factor = 58 1.1; // for easy tweaking of the balance between luma (or anything 59 // non-chroma) and chroma (decrease this number for higher quality 60 // luma) 61 const float squeeze_quality_factor_xyb = 4.8f; 62 const float squeeze_xyb_qtable[3][16] = { 63 {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16, 64 0.08, 0.04, 0.02, 0.01, 0.005}, // Y 65 {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 66 0.5}, // X 67 {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 68 0.5}, // B-Y 69 }; 70 71 const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 72 2.56, 1.28, 0.64, 0.32, 0.16, 0.08, 73 0.04, 0.02, 0.01, 0.005}; 74 // for 8-bit input, the range of YCoCg chroma is -255..255 so basically this 75 // does 4:2:0 subsampling (two most fine grained layers get quantized away) 76 const float squeeze_chroma_qtable[16] = { 77 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5}; 78 79 // Merges the trees in `trees` using nodes that decide on stream_id, as defined 80 // by `tree_splits`. 81 Status MergeTrees(const std::vector<Tree>& trees, 82 const std::vector<size_t>& tree_splits, size_t begin, 83 size_t end, Tree* tree) { 84 JXL_ENSURE(trees.size() + 1 == tree_splits.size()); 85 JXL_ENSURE(end > begin); 86 JXL_ENSURE(end <= trees.size()); 87 if (end == begin + 1) { 88 // Insert the tree, adding the opportune offset to all child nodes. 89 // This will make the leaf IDs wrong, but subsequent roundtripping will fix 90 // them. 91 size_t sz = tree->size(); 92 tree->insert(tree->end(), trees[begin].begin(), trees[begin].end()); 93 for (size_t i = sz; i < tree->size(); i++) { 94 (*tree)[i].lchild += sz; 95 (*tree)[i].rchild += sz; 96 } 97 return true; 98 } 99 size_t mid = (begin + end) / 2; 100 size_t splitval = tree_splits[mid] - 1; 101 size_t cur = tree->size(); 102 tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1); 103 (*tree)[cur].lchild = tree->size(); 104 JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, mid, end, tree)); 105 (*tree)[cur].rchild = tree->size(); 106 JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, begin, mid, tree)); 107 return true; 108 } 109 110 void QuantizeChannel(Channel& ch, const int q) { 111 if (q == 1) return; 112 for (size_t y = 0; y < ch.plane.ysize(); y++) { 113 pixel_type* row = ch.plane.Row(y); 114 for (size_t x = 0; x < ch.plane.xsize(); x++) { 115 if (row[x] < 0) { 116 row[x] = -((-row[x] + q / 2) / q) * q; 117 } else { 118 row[x] = ((row[x] + q / 2) / q) * q; 119 } 120 } 121 } 122 } 123 124 // convert binary32 float that corresponds to custom [bits]-bit float (with 125 // [exp_bits] exponent bits) to a [bits]-bit integer representation that should 126 // fit in pixel_type 127 Status float_to_int(const float* const row_in, pixel_type* const row_out, 128 size_t xsize, unsigned int bits, unsigned int exp_bits, 129 bool fp, double dfactor) { 130 JXL_ENSURE(sizeof(pixel_type) * 8 >= bits); 131 if (!fp) { 132 if (bits > 22) { 133 for (size_t x = 0; x < xsize; ++x) { 134 row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5); 135 } 136 } else { 137 float factor = dfactor; 138 for (size_t x = 0; x < xsize; ++x) { 139 row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f); 140 } 141 } 142 return true; 143 } 144 if (bits == 32 && fp) { 145 JXL_ENSURE(exp_bits == 8); 146 memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in), 147 4 * xsize); 148 return true; 149 } 150 151 JXL_ENSURE(bits > 0); 152 int exp_bias = (1 << (exp_bits - 1)) - 1; 153 int max_exp = (1 << exp_bits) - 1; 154 uint32_t sign = (1u << (bits - 1)); 155 int mant_bits = bits - exp_bits - 1; 156 int mant_shift = 23 - mant_bits; 157 for (size_t x = 0; x < xsize; ++x) { 158 uint32_t f; 159 memcpy(&f, &row_in[x], 4); 160 int signbit = (f >> 31); 161 f &= 0x7fffffff; 162 if (f == 0) { 163 row_out[x] = (signbit ? sign : 0); 164 continue; 165 } 166 int exp = (f >> 23) - 127; 167 if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed"); 168 int mantissa = (f & 0x007fffff); 169 // broke up the binary32 into its parts, now reassemble into 170 // arbitrary float 171 exp += exp_bias; 172 if (exp < 0) { // will become a subnormal number 173 // add implicit leading 1 to mantissa 174 mantissa |= 0x00800000; 175 if (exp < -mant_bits) { 176 return JXL_FAILURE( 177 "Invalid float number: %g cannot be represented with %i " 178 "exp_bits and %i mant_bits (exp %i)", 179 row_in[x], exp_bits, mant_bits, exp); 180 } 181 mantissa >>= 1 - exp; 182 exp = 0; 183 } 184 // exp should be representable in exp_bits, otherwise input was 185 // invalid 186 if (exp > max_exp) return JXL_FAILURE("Invalid float exponent"); 187 if (mantissa & ((1 << mant_shift) - 1)) { 188 return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x], 189 mantissa); 190 } 191 mantissa >>= mant_shift; 192 f = (signbit ? sign : 0); 193 f |= (exp << mant_bits); 194 f |= mantissa; 195 row_out[x] = static_cast<pixel_type>(f); 196 } 197 return true; 198 } 199 200 float EstimateWPCost(const Image& img, size_t i) { 201 size_t extra_bits = 0; 202 float histo_cost = 0; 203 HybridUintConfig config; 204 int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31, 205 -23, -15, -11, -7, -4, -3, -1, 0, 1, 206 3, 5, 7, 11, 15, 23, 31, 47, 63, 207 95, 127, 191, 255, 392, 500}; 208 constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; 209 Histogram histo[nc] = {}; 210 weighted::Header wp_header; 211 PredictorMode(i, &wp_header); 212 for (const Channel& ch : img.channel) { 213 const intptr_t onerow = ch.plane.PixelsPerRow(); 214 weighted::State wp_state(wp_header, ch.w, ch.h); 215 Properties properties(1); 216 for (size_t y = 0; y < ch.h; y++) { 217 const pixel_type* JXL_RESTRICT r = ch.Row(y); 218 for (size_t x = 0; x < ch.w; x++) { 219 size_t offset = 0; 220 pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); 221 pixel_type_w top = (y ? *(r + x - onerow) : left); 222 pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); 223 pixel_type_w topright = 224 (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top); 225 pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); 226 pixel_type guess = wp_state.Predict</*compute_properties=*/true>( 227 x, y, ch.w, top, left, topright, topleft, toptop, &properties, 228 offset); 229 size_t ctx = 0; 230 for (int c : cutoffs) { 231 ctx += (c >= properties[0]) ? 1 : 0; 232 } 233 pixel_type res = r[x] - guess; 234 uint32_t token; 235 uint32_t nbits; 236 uint32_t bits; 237 config.Encode(PackSigned(res), &token, &nbits, &bits); 238 histo[ctx].Add(token); 239 extra_bits += nbits; 240 wp_state.UpdateErrors(r[x], x, y, ch.w); 241 } 242 } 243 for (auto& h : histo) { 244 histo_cost += h.ShannonEntropy(); 245 h.Clear(); 246 } 247 } 248 return histo_cost + extra_bits; 249 } 250 251 float EstimateCost(const Image& img) { 252 // TODO(veluca): consider SIMDfication of this code. 253 size_t extra_bits = 0; 254 float histo_cost = 0; 255 HybridUintConfig config; 256 uint32_t cutoffs[] = {0, 1, 3, 5, 7, 11, 15, 23, 31, 257 47, 63, 95, 127, 191, 255, 392, 500}; 258 constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; 259 Histogram histo[nc] = {}; 260 for (const Channel& ch : img.channel) { 261 const intptr_t onerow = ch.plane.PixelsPerRow(); 262 for (size_t y = 0; y < ch.h; y++) { 263 const pixel_type* JXL_RESTRICT r = ch.Row(y); 264 for (size_t x = 0; x < ch.w; x++) { 265 pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); 266 pixel_type_w top = (y ? *(r + x - onerow) : left); 267 pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); 268 size_t maxdiff = std::max(std::max(left, top), topleft) - 269 std::min(std::min(left, top), topleft); 270 size_t ctx = 0; 271 for (uint32_t c : cutoffs) { 272 ctx += (c > maxdiff) ? 1 : 0; 273 } 274 pixel_type res = r[x] - ClampedGradient(top, left, topleft); 275 uint32_t token; 276 uint32_t nbits; 277 uint32_t bits; 278 config.Encode(PackSigned(res), &token, &nbits, &bits); 279 histo[ctx].Add(token); 280 extra_bits += nbits; 281 } 282 } 283 for (auto& h : histo) { 284 histo_cost += h.ShannonEntropy(); 285 h.Clear(); 286 } 287 } 288 return histo_cost + extra_bits; 289 } 290 291 bool do_transform(Image& image, const Transform& tr, 292 const weighted::Header& wp_header, 293 jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) { 294 Transform t = tr; 295 bool did_it = true; 296 if (force_jxlart) { 297 if (!t.MetaApply(image)) return false; 298 } else { 299 did_it = TransformForward(t, image, wp_header, pool); 300 } 301 if (did_it) image.transform.push_back(t); 302 return did_it; 303 } 304 305 bool maybe_do_transform(Image& image, const Transform& tr, 306 const CompressParams& cparams, 307 const weighted::Header& wp_header, float cost_before, 308 jxl::ThreadPool* pool = nullptr, 309 bool force_jxlart = false) { 310 if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) { 311 return do_transform(image, tr, wp_header, pool, force_jxlart); 312 } 313 bool did_it = do_transform(image, tr, wp_header, pool); 314 if (did_it) { 315 float cost_after = EstimateCost(image); 316 JXL_DEBUG_V(7, "Cost before: %f cost after: %f", cost_before, cost_after); 317 if (cost_after > cost_before) { 318 Transform t = image.transform.back(); 319 JXL_RETURN_IF_ERROR(t.Inverse(image, wp_header, pool)); 320 image.transform.pop_back(); 321 did_it = false; 322 } 323 } 324 return did_it; 325 } 326 327 void try_palettes(Image& gi, int& max_bitdepth, int& maxval, 328 const CompressParams& cparams_, float channel_colors_percent, 329 jxl::ThreadPool* pool = nullptr) { 330 float cost_before = 0.f; 331 size_t did_palette = 0; 332 float nb_pixels = gi.channel[0].w * gi.channel[0].h; 333 int nb_chans = gi.channel.size() - gi.nb_meta_channels; 334 // arbitrary estimate: 4.8 bpp for 8-bit RGB 335 float arbitrary_bpp_estimate = 0.2f * gi.bitdepth * nb_chans; 336 337 if (cparams_.palette_colors != 0 || cparams_.lossy_palette) { 338 // when not estimating, assume some arbitrary bpp 339 cost_before = cparams_.speed_tier <= SpeedTier::kSquirrel 340 ? EstimateCost(gi) 341 : nb_pixels * arbitrary_bpp_estimate; 342 // all-channel palette (e.g. RGBA) 343 if (nb_chans > 1) { 344 Transform maybe_palette(TransformId::kPalette); 345 maybe_palette.begin_c = gi.nb_meta_channels; 346 maybe_palette.num_c = nb_chans; 347 // Heuristic choice of max colors for a palette: 348 // max_colors = nb_pixels * estimated_bpp_without_palette * 0.0005 + 349 // + nb_pixels / 128 + 128 350 // (estimated_bpp_without_palette = cost_before / nb_pixels) 351 // Rationale: small image with large palette is not effective; 352 // also if the entropy (estimated bpp) is low (e.g. mostly solid/gradient 353 // areas), palette is less useful and may even be counterproductive. 354 maybe_palette.nb_colors = std::min( 355 static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128), 356 std::abs(cparams_.palette_colors)); 357 maybe_palette.ordered_palette = cparams_.palette_colors >= 0; 358 maybe_palette.lossy_palette = 359 (cparams_.lossy_palette && maybe_palette.num_c == 3); 360 if (maybe_palette.lossy_palette) { 361 maybe_palette.predictor = Predictor::Average4; 362 } 363 // TODO(veluca): use a custom weighted header if using the weighted 364 // predictor. 365 if (maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(), 366 cost_before, pool, cparams_.options.zero_tokens)) { 367 did_palette = 1; 368 }; 369 } 370 // all-minus-one-channel palette (RGB with separate alpha, or CMY with 371 // separate K) 372 if (!did_palette && nb_chans > 3) { 373 Transform maybe_palette_3(TransformId::kPalette); 374 maybe_palette_3.begin_c = gi.nb_meta_channels; 375 maybe_palette_3.num_c = nb_chans - 1; 376 maybe_palette_3.nb_colors = std::min( 377 static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128), 378 std::abs(cparams_.palette_colors)); 379 maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; 380 maybe_palette_3.lossy_palette = cparams_.lossy_palette; 381 if (maybe_palette_3.lossy_palette) { 382 maybe_palette_3.predictor = Predictor::Average4; 383 } 384 if (maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(), 385 cost_before, pool, cparams_.options.zero_tokens)) { 386 did_palette = 1; 387 } 388 } 389 } 390 391 if (channel_colors_percent > 0) { 392 // single channel palette (like FLIF's ChannelCompact) 393 size_t nb_channels = gi.channel.size() - gi.nb_meta_channels - did_palette; 394 int orig_bitdepth = max_bitdepth; 395 max_bitdepth = 0; 396 if (nb_channels > 0 && (did_palette || cost_before == 0)) { 397 cost_before = 398 cparams_.speed_tier < SpeedTier::kSquirrel ? EstimateCost(gi) : 0; 399 } 400 for (size_t i = did_palette; i < nb_channels + did_palette; i++) { 401 int32_t min; 402 int32_t max; 403 compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); 404 int64_t colors = static_cast<int64_t>(max) - min + 1; 405 JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); 406 Transform maybe_palette_1(TransformId::kPalette); 407 maybe_palette_1.begin_c = i + gi.nb_meta_channels; 408 maybe_palette_1.num_c = 1; 409 // simple heuristic: if less than X percent of the values in the range 410 // actually occur, it is probably worth it to do a compaction 411 // (but only if the channel palette is less than 6% the size of the 412 // image itself) 413 maybe_palette_1.nb_colors = 414 std::min(static_cast<int>(nb_pixels / 16), 415 static_cast<int>(channel_colors_percent / 100. * colors)); 416 if (maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(), 417 cost_before, pool)) { 418 // effective bit depth is lower, adjust quantization accordingly 419 compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); 420 if (max < maxval) maxval = max; 421 int ch_bitdepth = 422 (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0); 423 if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth; 424 } else { 425 max_bitdepth = orig_bitdepth; 426 } 427 } 428 } 429 } 430 431 } // namespace 432 433 StatusOr<ModularFrameEncoder> ModularFrameEncoder::Create( 434 JxlMemoryManager* memory_manager, const FrameHeader& frame_header, 435 const CompressParams& cparams_orig, bool streaming_mode) { 436 ModularFrameEncoder self{memory_manager}; 437 JXL_RETURN_IF_ERROR(self.Init(frame_header, cparams_orig, streaming_mode)); 438 return self; 439 } 440 441 ModularFrameEncoder::ModularFrameEncoder(JxlMemoryManager* memory_manager) 442 : memory_manager_(memory_manager) {} 443 444 Status ModularFrameEncoder::Init(const FrameHeader& frame_header, 445 const CompressParams& cparams_orig, 446 bool streaming_mode) { 447 frame_dim_ = frame_header.ToFrameDimensions(); 448 cparams_ = cparams_orig; 449 450 size_t num_streams = 451 ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes); 452 if (cparams_.ModularPartIsLossless()) { 453 switch (cparams_.decoding_speed_tier) { 454 case 0: 455 break; 456 case 1: 457 cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly; 458 break; 459 case 2: { 460 cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; 461 cparams_.options.predictor = Predictor::Gradient; 462 break; 463 } 464 case 3: { // LZ77, no Gradient. 465 cparams_.options.nb_repeats = 0; 466 cparams_.options.predictor = Predictor::Gradient; 467 break; 468 } 469 default: { // LZ77, no predictor. 470 cparams_.options.nb_repeats = 0; 471 cparams_.options.predictor = Predictor::Zero; 472 break; 473 } 474 } 475 } 476 if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive && 477 cparams_.ModularPartIsLossless()) { 478 cparams_.options.tree_kind = 479 ModularOptions::TreeKind::kTrivialTreeNoPredictor; 480 cparams_.options.nb_repeats = 0; 481 } 482 for (size_t i = 0; i < num_streams; ++i) { 483 stream_images_.emplace_back(memory_manager_); 484 } 485 486 // use a sensible default if nothing explicit is specified: 487 // Squeeze for lossy, no squeeze for lossless 488 if (cparams_.responsive < 0) { 489 if (cparams_.ModularPartIsLossless()) { 490 cparams_.responsive = 0; 491 } else { 492 cparams_.responsive = 1; 493 } 494 } 495 496 cparams_.options.splitting_heuristics_node_threshold = 497 82 + 14 * static_cast<int>(cparams_.speed_tier); 498 499 { 500 // Set properties. 501 std::vector<uint32_t> prop_order; 502 if (cparams_.responsive) { 503 // Properties in order of their likelihood of being useful for Squeeze 504 // residuals. 505 prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3}; 506 } else { 507 // Same, but for the non-Squeeze case. 508 prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8}; 509 // if few groups, don't use group as a property 510 if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise && 511 cparams_orig.ModularPartIsLossless()) { 512 prop_order.erase(prop_order.begin() + 1); 513 } 514 } 515 int max_properties = std::min<int>( 516 cparams_.options.max_properties, 517 static_cast<int>( 518 frame_header.nonserialized_metadata->m.num_extra_channels) + 519 (frame_header.encoding == FrameEncoding::kModular ? 2 : -1)); 520 switch (cparams_.speed_tier) { 521 case SpeedTier::kHare: 522 cparams_.options.splitting_heuristics_properties.assign( 523 prop_order.begin(), prop_order.begin() + 4); 524 cparams_.options.max_property_values = 24; 525 break; 526 case SpeedTier::kWombat: 527 cparams_.options.splitting_heuristics_properties.assign( 528 prop_order.begin(), prop_order.begin() + 5); 529 cparams_.options.max_property_values = 32; 530 break; 531 case SpeedTier::kSquirrel: 532 cparams_.options.splitting_heuristics_properties.assign( 533 prop_order.begin(), prop_order.begin() + 7); 534 cparams_.options.max_property_values = 48; 535 break; 536 case SpeedTier::kKitten: 537 cparams_.options.splitting_heuristics_properties.assign( 538 prop_order.begin(), prop_order.begin() + 10); 539 cparams_.options.max_property_values = 96; 540 break; 541 case SpeedTier::kGlacier: 542 case SpeedTier::kTortoise: 543 cparams_.options.splitting_heuristics_properties = prop_order; 544 cparams_.options.max_property_values = 256; 545 break; 546 default: 547 cparams_.options.splitting_heuristics_properties.assign( 548 prop_order.begin(), prop_order.begin() + 3); 549 cparams_.options.max_property_values = 16; 550 break; 551 } 552 if (cparams_.speed_tier > SpeedTier::kTortoise) { 553 // Gradient in previous channels. 554 for (int i = 0; i < max_properties; i++) { 555 cparams_.options.splitting_heuristics_properties.push_back( 556 kNumNonrefProperties + i * 4 + 3); 557 } 558 } else { 559 // All the extra properties in Tortoise mode. 560 for (int i = 0; i < max_properties * 4; i++) { 561 cparams_.options.splitting_heuristics_properties.push_back( 562 kNumNonrefProperties + i); 563 } 564 } 565 } 566 567 if ((cparams_.options.predictor == Predictor::Average0 || 568 cparams_.options.predictor == Predictor::Average1 || 569 cparams_.options.predictor == Predictor::Average2 || 570 cparams_.options.predictor == Predictor::Average3 || 571 cparams_.options.predictor == Predictor::Average4 || 572 cparams_.options.predictor == Predictor::Weighted) && 573 !cparams_.ModularPartIsLossless()) { 574 // Lossy + Average/Weighted predictors does not work, so switch to default 575 // predictors. 576 cparams_.options.predictor = kUndefinedPredictor; 577 } 578 579 if (cparams_.options.predictor == kUndefinedPredictor) { 580 // no explicit predictor(s) given, set a good default 581 if ((cparams_.speed_tier <= SpeedTier::kGlacier || 582 cparams_.modular_mode == false) && 583 cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) { 584 // TODO(veluca): allow all predictors that don't break residual 585 // multipliers in lossy mode. 586 cparams_.options.predictor = Predictor::Variable; 587 } else if (cparams_.responsive || cparams_.lossy_palette) { 588 // zero predictor for Squeeze residues and lossy palette 589 cparams_.options.predictor = Predictor::Zero; 590 } else if (!cparams_.IsLossless()) { 591 // If not responsive and lossy. TODO(veluca): use near_lossless instead? 592 cparams_.options.predictor = Predictor::Gradient; 593 } else if (cparams_.speed_tier < SpeedTier::kFalcon) { 594 // try median and weighted predictor for anything else 595 cparams_.options.predictor = Predictor::Best; 596 } else if (cparams_.speed_tier == SpeedTier::kFalcon) { 597 // just weighted predictor in falcon mode 598 cparams_.options.predictor = Predictor::Weighted; 599 } else if (cparams_.speed_tier > SpeedTier::kFalcon) { 600 // just gradient predictor in thunder mode 601 cparams_.options.predictor = Predictor::Gradient; 602 } 603 } else { 604 if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero; 605 } 606 if (!cparams_.ModularPartIsLossless()) { 607 if (cparams_.options.predictor == Predictor::Weighted || 608 cparams_.options.predictor == Predictor::Variable || 609 cparams_.options.predictor == Predictor::Best) 610 cparams_.options.predictor = Predictor::Zero; 611 } 612 tree_splits_.push_back(0); 613 if (cparams_.modular_mode == false) { 614 JXL_ASSIGN_OR_RETURN(ModularStreamId qt0, ModularStreamId::QuantTable(0)); 615 cparams_.options.fast_decode_multiplier = 1.0f; 616 tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_)); 617 tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_)); 618 tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_)); 619 tree_splits_.push_back(qt0.ID(frame_dim_)); 620 tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_)); 621 ac_metadata_size.resize(frame_dim_.num_dc_groups); 622 extra_dc_precision.resize(frame_dim_.num_dc_groups); 623 } 624 tree_splits_.push_back(num_streams); 625 cparams_.options.max_chan_size = frame_dim_.group_dim; 626 cparams_.options.group_dim = frame_dim_.group_dim; 627 628 // TODO(veluca): figure out how to use different predictor sets per channel. 629 stream_options_.resize(num_streams, cparams_.options); 630 631 stream_options_[0] = cparams_.options; 632 if (cparams_.speed_tier == SpeedTier::kFalcon) { 633 stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC; 634 } else if (cparams_.speed_tier == SpeedTier::kThunder) { 635 stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC; 636 } 637 stream_options_[0].histogram_params = 638 HistogramParams::ForModular(cparams_, {}, streaming_mode); 639 return true; 640 } 641 642 Status ModularFrameEncoder::ComputeEncodingData( 643 const FrameHeader& frame_header, const ImageMetadata& metadata, 644 Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels, 645 const Rect& group_rect, const FrameDimensions& patch_dim, 646 const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state, 647 const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out, 648 bool do_color) { 649 JxlMemoryManager* memory_manager = enc_state->memory_manager(); 650 JXL_DEBUG_V(6, "Computing modular encoding data for frame %s", 651 frame_header.DebugString().c_str()); 652 653 bool groupwise = enc_state->streaming_mode; 654 655 if (do_color && frame_header.loop_filter.gab && !groupwise) { 656 float w = 0.9908511000000001f; 657 float weights[3] = {w, w, w}; 658 JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool)); 659 } 660 661 if (do_color && metadata.bit_depth.bits_per_sample <= 16 && 662 cparams_.speed_tier < SpeedTier::kCheetah && 663 cparams_.decoding_speed_tier < 2 && !groupwise) { 664 JXL_RETURN_IF_ERROR(FindBestPatchDictionary( 665 *color, enc_state, cms, nullptr, aux_out, 666 cparams_.color_transform == ColorTransform::kXYB)); 667 JXL_RETURN_IF_ERROR(PatchDictionaryEncoder::SubtractFrom( 668 enc_state->shared.image_features.patches, color)); 669 } 670 671 if (cparams_.custom_splines.HasAny()) { 672 PassesSharedState& shared = enc_state->shared; 673 ImageFeatures& image_features = shared.image_features; 674 image_features.splines = cparams_.custom_splines; 675 } 676 677 // Convert ImageBundle to modular Image object 678 const size_t xsize = patch_dim.xsize; 679 const size_t ysize = patch_dim.ysize; 680 681 int nb_chans = 3; 682 if (metadata.color_encoding.IsGray() && 683 cparams_.color_transform == ColorTransform::kNone) { 684 nb_chans = 1; 685 } 686 if (!do_color) nb_chans = 0; 687 688 nb_chans += extra_channels.size(); 689 690 bool fp = metadata.bit_depth.floating_point_sample && 691 cparams_.color_transform != ColorTransform::kXYB; 692 693 // bits_per_sample is just metadata for XYB images. 694 if (metadata.bit_depth.bits_per_sample >= 32 && do_color && 695 cparams_.color_transform != ColorTransform::kXYB) { 696 if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { 697 return JXL_FAILURE("uint32_t not supported in enc_modular"); 698 } else if (metadata.bit_depth.bits_per_sample > 32) { 699 return JXL_FAILURE("bits_per_sample > 32 not supported"); 700 } 701 } 702 703 // in the non-float case, there is an implicit 0 sign bit 704 int max_bitdepth = 705 do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0; 706 Image& gi = stream_images_[0]; 707 JXL_ASSIGN_OR_RETURN( 708 gi, Image::Create(memory_manager, xsize, ysize, 709 metadata.bit_depth.bits_per_sample, nb_chans)); 710 int c = 0; 711 if (cparams_.color_transform == ColorTransform::kXYB && 712 cparams_.modular_mode == true) { 713 float enc_factors[3] = {65536.0f, 4096.0f, 4096.0f}; 714 if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) { 715 // quantize XYB here and then treat it as a lossless image 716 enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance); 717 enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); 718 enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); 719 cparams_.butteraugli_distance = 0; 720 } 721 if (cparams_.manual_xyb_factors.size() == 3) { 722 JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC( 723 memory_manager, &enc_state->shared.matrices, 724 cparams_.manual_xyb_factors.data())); 725 // TODO(jon): update max_bitdepth in this case 726 } else { 727 JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC( 728 memory_manager, &enc_state->shared.matrices, enc_factors)); 729 max_bitdepth = 12; 730 } 731 } 732 pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0; 733 if (do_color) { 734 for (; c < 3; c++) { 735 if (metadata.color_encoding.IsGray() && 736 cparams_.color_transform == ColorTransform::kNone && 737 c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0)) 738 continue; 739 int c_out = c; 740 // XYB is encoded as YX(B-Y) 741 if (cparams_.color_transform == ColorTransform::kXYB && c < 2) 742 c_out = 1 - c_out; 743 double factor = maxval; 744 if (cparams_.color_transform == ColorTransform::kXYB) 745 factor = enc_state->shared.matrices.InvDCQuant(c); 746 if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) { 747 JXL_ENSURE(!fp); 748 for (size_t y = 0; y < ysize; ++y) { 749 const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); 750 pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); 751 pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y); 752 for (size_t x = 0; x < xsize; ++x) { 753 // TODO(eustas): check if std::roundf is appropriate 754 row_out[x] = row_in[x] * factor + 0.5f; 755 row_out[x] -= row_Y[x]; 756 } 757 } 758 } else { 759 int bits = metadata.bit_depth.bits_per_sample; 760 int exp_bits = metadata.bit_depth.exponent_bits_per_sample; 761 gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c); 762 gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c); 763 size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift); 764 size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift); 765 JXL_RETURN_IF_ERROR( 766 gi.channel[c_out].shrink(xsize_shifted, ysize_shifted)); 767 const auto process_row = [&](const int task, 768 const int thread) -> Status { 769 const size_t y = task; 770 const float* const JXL_RESTRICT row_in = 771 color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0(); 772 pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); 773 JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits, 774 exp_bits, fp, factor)); 775 return true; 776 }; 777 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted, 778 ThreadPool::NoInit, process_row, 779 "float2int")); 780 } 781 } 782 if (metadata.color_encoding.IsGray() && 783 cparams_.color_transform == ColorTransform::kNone) 784 c = 1; 785 } 786 787 for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) { 788 const ExtraChannelInfo& eci = metadata.extra_channel_info[ec]; 789 size_t ecups = frame_header.extra_channel_upsampling[ec]; 790 JXL_RETURN_IF_ERROR( 791 gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups), 792 DivCeil(patch_dim.ysize_upsampled, ecups))); 793 gi.channel[c].hshift = gi.channel[c].vshift = 794 CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); 795 796 int bits = eci.bit_depth.bits_per_sample; 797 int exp_bits = eci.bit_depth.exponent_bits_per_sample; 798 bool fp = eci.bit_depth.floating_point_sample; 799 double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1)); 800 if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1); 801 const auto process_row = [&](const int task, const int thread) -> Status { 802 const size_t y = task; 803 const float* const JXL_RESTRICT row_in = 804 extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0(); 805 pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y); 806 JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, 807 gi.channel[c].plane.xsize(), bits, 808 exp_bits, fp, factor)); 809 return true; 810 }; 811 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, gi.channel[c].plane.ysize(), 812 ThreadPool::NoInit, process_row, 813 "float2int")); 814 } 815 JXL_ENSURE(c == nb_chans); 816 817 int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32); 818 if (max_bitdepth > level_max_bitdepth) { 819 return JXL_FAILURE( 820 "Bitdepth too high for level %i (need %i bits, have only %i in this " 821 "level)", 822 cparams_.level, max_bitdepth, level_max_bitdepth); 823 } 824 825 // Set options and apply transformations 826 if (!cparams_.ModularPartIsLossless()) { 827 if (cparams_.palette_colors != 0) { 828 JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms"); 829 } 830 if (cparams_.color_transform == ColorTransform::kXYB) { 831 cparams_.channel_colors_pre_transform_percent = 0; 832 } 833 cparams_.channel_colors_percent = 0; 834 cparams_.palette_colors = 0; 835 cparams_.lossy_palette = false; 836 } 837 838 // Global palette transforms 839 float channel_colors_percent = 0; 840 if (!cparams_.lossy_palette && 841 (cparams_.speed_tier <= SpeedTier::kThunder || 842 (do_color && metadata.bit_depth.bits_per_sample > 8))) { 843 channel_colors_percent = cparams_.channel_colors_pre_transform_percent; 844 } 845 if (!groupwise) { 846 try_palettes(gi, max_bitdepth, maxval, cparams_, channel_colors_percent, 847 pool); 848 } 849 850 // don't do an RCT if we're short on bits 851 if (cparams_.color_transform == ColorTransform::kNone && do_color && 852 gi.channel.size() - gi.nb_meta_channels >= 3 && 853 max_bitdepth + 1 < level_max_bitdepth) { 854 if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() || 855 cparams_.speed_tier > SpeedTier::kHare)) { 856 Transform ycocg{TransformId::kRCT}; 857 ycocg.rct_type = 6; 858 ycocg.begin_c = gi.nb_meta_channels; 859 do_transform(gi, ycocg, weighted::Header(), pool); 860 max_bitdepth++; 861 } else if (cparams_.colorspace > 0) { 862 Transform sg(TransformId::kRCT); 863 sg.begin_c = gi.nb_meta_channels; 864 sg.rct_type = cparams_.colorspace; 865 do_transform(gi, sg, weighted::Header(), pool); 866 max_bitdepth++; 867 } 868 } 869 870 if (cparams_.move_to_front_from_channel > 0) { 871 for (size_t tgt = 0; 872 tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) { 873 size_t pos = cparams_.move_to_front_from_channel; 874 while (pos > 0) { 875 Transform move(TransformId::kRCT); 876 if (pos == 1) { 877 move.begin_c = tgt; 878 move.rct_type = 28; // RGB -> GRB 879 pos -= 1; 880 } else { 881 move.begin_c = tgt + pos - 2; 882 move.rct_type = 14; // RGB -> BRG 883 pos -= 2; 884 } 885 do_transform(gi, move, weighted::Header(), pool); 886 } 887 } 888 } 889 890 // don't do squeeze if we don't have some spare bits 891 if (!groupwise && cparams_.responsive && !gi.channel.empty() && 892 max_bitdepth + 2 < level_max_bitdepth) { 893 Transform t(TransformId::kSqueeze); 894 do_transform(gi, t, weighted::Header(), pool); 895 max_bitdepth += 2; 896 } 897 898 if (max_bitdepth + 1 > level_max_bitdepth) { 899 // force no group RCTs if we don't have a spare bit 900 cparams_.colorspace = 0; 901 } 902 JXL_ENSURE(max_bitdepth <= level_max_bitdepth); 903 904 if (!cparams_.ModularPartIsLossless()) { 905 quants_.resize(gi.channel.size(), 1); 906 float quantizer = 0.25f; 907 if (!cparams_.responsive) { 908 JXL_DEBUG_V(1, 909 "Warning: lossy compression without Squeeze " 910 "transform is just color quantization."); 911 quantizer *= 0.1f; 912 } 913 float bitdepth_correction = 1.f; 914 if (cparams_.color_transform != ColorTransform::kXYB) { 915 bitdepth_correction = maxval / 255.f; 916 } 917 std::vector<float> quantizers; 918 for (size_t i = 0; i < 3; i++) { 919 float dist = cparams_.butteraugli_distance; 920 quantizers.push_back(quantizer * dist * bitdepth_correction); 921 } 922 for (size_t i = 0; i < extra_channels.size(); i++) { 923 int ec_bitdepth = 924 metadata.extra_channel_info[i].bit_depth.bits_per_sample; 925 pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0; 926 bitdepth_correction = ec_maxval / 255.f; 927 float dist = 0; 928 if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i]; 929 if (dist < 0) dist = cparams_.butteraugli_distance; 930 quantizers.push_back(quantizer * dist * bitdepth_correction); 931 } 932 if (cparams_.options.nb_repeats == 0) { 933 return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!"); 934 } 935 for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) { 936 Channel& ch = gi.channel[i]; 937 int shift = ch.hshift + ch.vshift; // number of pixel halvings 938 if (shift > 16) shift = 16; 939 if (shift > 0) shift--; 940 int q; 941 // assuming default Squeeze here 942 int component = 943 (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans); 944 // last 4 channels are final chroma residuals 945 if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) { 946 component = 1; 947 } 948 if (cparams_.color_transform == ColorTransform::kXYB && component < 3) { 949 q = quantizers[component] * squeeze_quality_factor_xyb * 950 squeeze_xyb_qtable[component][shift]; 951 } else { 952 if (cparams_.colorspace != 0 && component > 0 && component < 3) { 953 q = quantizers[component] * squeeze_quality_factor * 954 squeeze_chroma_qtable[shift]; 955 } else { 956 q = quantizers[component] * squeeze_quality_factor * 957 squeeze_luma_factor * squeeze_luma_qtable[shift]; 958 } 959 } 960 if (q < 1) q = 1; 961 QuantizeChannel(gi.channel[i], q); 962 quants_[i] = q; 963 } 964 } 965 966 // Fill other groups. 967 // DC 968 for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) { 969 const size_t rgx = group_id % patch_dim.xsize_dc_groups; 970 const size_t rgy = group_id / patch_dim.xsize_dc_groups; 971 const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim, 972 patch_dim.dc_group_dim, patch_dim.dc_group_dim); 973 size_t gx = rgx + frame_area_rect.x0() / 2048; 974 size_t gy = rgy + frame_area_rect.y0() / 2048; 975 size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx; 976 // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim 977 // maxShift==1000 is infinity 978 stream_params_.push_back( 979 GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)}); 980 } 981 // AC global -> nothing. 982 // AC 983 for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) { 984 const size_t rgx = group_id % patch_dim.xsize_groups; 985 const size_t rgy = group_id / patch_dim.xsize_groups; 986 const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim, 987 patch_dim.group_dim, patch_dim.group_dim); 988 size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim); 989 size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim); 990 size_t real_group_id = gy * frame_dim_.xsize_groups + gx; 991 for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses(); 992 i++) { 993 int maxShift; 994 int minShift; 995 frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift); 996 stream_params_.push_back( 997 GroupParams{mrect, minShift, maxShift, 998 ModularStreamId::ModularAC(real_group_id, i)}); 999 } 1000 } 1001 // if there's only one group, everything ends up in GlobalModular 1002 // in that case, also try RCTs/WP params for the one group 1003 if (stream_params_.size() == 2) { 1004 stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000, 1005 ModularStreamId::Global()}); 1006 } 1007 gi_channel_.resize(stream_images_.size()); 1008 1009 const auto process_row = [&](const uint32_t i, 1010 size_t /* thread */) -> Status { 1011 size_t stream = stream_params_[i].id.ID(frame_dim_); 1012 if (stream != 0) { 1013 stream_options_[stream] = stream_options_[0]; 1014 } 1015 JXL_RETURN_IF_ERROR(PrepareStreamParams( 1016 stream_params_[i].rect, cparams_, stream_params_[i].minShift, 1017 stream_params_[i].maxShift, stream_params_[i].id, do_color, groupwise)); 1018 return true; 1019 }; 1020 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, stream_params_.size(), 1021 ThreadPool::NoInit, process_row, 1022 "ChooseParams")); 1023 { 1024 // Clear out channels that have been copied to groups. 1025 Image& full_image = stream_images_[0]; 1026 size_t c = full_image.nb_meta_channels; 1027 for (; c < full_image.channel.size(); c++) { 1028 Channel& fc = full_image.channel[c]; 1029 if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; 1030 } 1031 for (; c < full_image.channel.size(); c++) { 1032 full_image.channel[c].plane = ImageI(); 1033 } 1034 } 1035 1036 JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0])); 1037 return true; 1038 } 1039 1040 Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) { 1041 std::vector<ModularMultiplierInfo> multiplier_info; 1042 if (!quants_.empty()) { 1043 for (uint32_t stream_id = 0; stream_id < stream_images_.size(); 1044 stream_id++) { 1045 // skip non-modular stream_ids 1046 if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; 1047 const Image& image = stream_images_[stream_id]; 1048 const ModularOptions& options = stream_options_[stream_id]; 1049 for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) { 1050 if (i >= image.nb_meta_channels && 1051 (image.channel[i].w > options.max_chan_size || 1052 image.channel[i].h > options.max_chan_size)) { 1053 continue; 1054 } 1055 if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; 1056 size_t ch_id = stream_id == 0 1057 ? i 1058 : gi_channel_[stream_id][i - image.nb_meta_channels]; 1059 uint32_t q = quants_[ch_id]; 1060 // Inform the tree splitting heuristics that each channel in each group 1061 // used this quantization factor. This will produce a tree with the 1062 // given multipliers. 1063 if (multiplier_info.empty() || 1064 multiplier_info.back().range[1][0] != stream_id || 1065 multiplier_info.back().multiplier != q) { 1066 StaticPropRange range; 1067 range[0] = {{i, i + 1}}; 1068 range[1] = {{stream_id, stream_id + 1}}; 1069 multiplier_info.push_back({range, static_cast<uint32_t>(q)}); 1070 } else { 1071 // Previous channel in the same group had the same quantization 1072 // factor. Don't provide two different ranges, as that creates 1073 // unnecessary nodes. 1074 multiplier_info.back().range[0][1] = i + 1; 1075 } 1076 } 1077 } 1078 // Merge group+channel settings that have the same channels and quantization 1079 // factors, to avoid unnecessary nodes. 1080 std::sort(multiplier_info.begin(), multiplier_info.end(), 1081 [](ModularMultiplierInfo a, ModularMultiplierInfo b) { 1082 return std::make_tuple(a.range, a.multiplier) < 1083 std::make_tuple(b.range, b.multiplier); 1084 }); 1085 size_t new_num = 1; 1086 for (size_t i = 1; i < multiplier_info.size(); i++) { 1087 ModularMultiplierInfo& prev = multiplier_info[new_num - 1]; 1088 ModularMultiplierInfo& cur = multiplier_info[i]; 1089 if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier && 1090 prev.range[1][1] == cur.range[1][0]) { 1091 prev.range[1][1] = cur.range[1][1]; 1092 } else { 1093 multiplier_info[new_num++] = multiplier_info[i]; 1094 } 1095 } 1096 multiplier_info.resize(new_num); 1097 } 1098 1099 if (!cparams_.custom_fixed_tree.empty()) { 1100 tree_ = cparams_.custom_fixed_tree; 1101 } else if (cparams_.speed_tier < SpeedTier::kFalcon || 1102 !cparams_.modular_mode) { 1103 // Avoid creating a tree with leaves that don't correspond to any pixels. 1104 std::vector<size_t> useful_splits; 1105 useful_splits.reserve(tree_splits_.size()); 1106 for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) { 1107 bool has_pixels = false; 1108 size_t start = tree_splits_[chunk]; 1109 size_t stop = tree_splits_[chunk + 1]; 1110 for (size_t i = start; i < stop; i++) { 1111 if (!stream_images_[i].empty()) has_pixels = true; 1112 } 1113 if (has_pixels) { 1114 useful_splits.push_back(tree_splits_[chunk]); 1115 } 1116 } 1117 // Don't do anything if modular mode does not have any pixels in this image 1118 if (useful_splits.empty()) return true; 1119 useful_splits.push_back(tree_splits_.back()); 1120 1121 std::vector<Tree> trees(useful_splits.size() - 1); 1122 const auto process_chunk = [&](const uint32_t chunk, 1123 size_t /* thread */) -> Status { 1124 // TODO(veluca): parallelize more. 1125 size_t total_pixels = 0; 1126 uint32_t start = useful_splits[chunk]; 1127 uint32_t stop = useful_splits[chunk + 1]; 1128 while (start < stop && stream_images_[start].empty()) ++start; 1129 while (start < stop && stream_images_[stop - 1].empty()) --stop; 1130 if (stream_options_[start].tree_kind != 1131 ModularOptions::TreeKind::kLearn) { 1132 for (size_t i = start; i < stop; i++) { 1133 for (const Channel& ch : stream_images_[i].channel) { 1134 total_pixels += ch.w * ch.h; 1135 } 1136 } 1137 trees[chunk] = PredefinedTree(stream_options_[start].tree_kind, 1138 total_pixels, 8, 0); 1139 return true; 1140 } 1141 TreeSamples tree_samples; 1142 JXL_RETURN_IF_ERROR( 1143 tree_samples.SetPredictor(stream_options_[start].predictor, 1144 stream_options_[start].wp_tree_mode)); 1145 JXL_RETURN_IF_ERROR(tree_samples.SetProperties( 1146 stream_options_[start].splitting_heuristics_properties, 1147 stream_options_[start].wp_tree_mode)); 1148 uint32_t max_c = 0; 1149 std::vector<pixel_type> pixel_samples; 1150 std::vector<pixel_type> diff_samples; 1151 std::vector<uint32_t> group_pixel_count; 1152 std::vector<uint32_t> channel_pixel_count; 1153 for (uint32_t i = start; i < stop; i++) { 1154 max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c); 1155 CollectPixelSamples(stream_images_[i], stream_options_[i], i, 1156 group_pixel_count, channel_pixel_count, 1157 pixel_samples, diff_samples); 1158 } 1159 StaticPropRange range; 1160 range[0] = {{0, max_c}}; 1161 range[1] = {{start, stop}}; 1162 1163 tree_samples.PreQuantizeProperties( 1164 range, multiplier_info, group_pixel_count, channel_pixel_count, 1165 pixel_samples, diff_samples, 1166 stream_options_[start].max_property_values); 1167 for (size_t i = start; i < stop; i++) { 1168 JXL_RETURN_IF_ERROR( 1169 ModularGenericCompress(stream_images_[i], stream_options_[i], 1170 /*writer=*/nullptr, 1171 /*aux_out=*/nullptr, LayerType::Header, i, 1172 &tree_samples, &total_pixels)); 1173 } 1174 1175 // TODO(veluca): parallelize more. 1176 JXL_ASSIGN_OR_RETURN( 1177 trees[chunk], 1178 LearnTree(std::move(tree_samples), total_pixels, 1179 stream_options_[start], multiplier_info, range)); 1180 return true; 1181 }; 1182 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, useful_splits.size() - 1, 1183 ThreadPool::NoInit, process_chunk, 1184 "LearnTrees")); 1185 tree_.clear(); 1186 JXL_RETURN_IF_ERROR( 1187 MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_)); 1188 } else { 1189 // Fixed tree. 1190 size_t total_pixels = 0; 1191 int max_bitdepth = 0; 1192 for (const Image& img : stream_images_) { 1193 max_bitdepth = std::max(max_bitdepth, img.bitdepth); 1194 for (const Channel& ch : img.channel) { 1195 total_pixels += ch.w * ch.h; 1196 } 1197 } 1198 if (cparams_.speed_tier <= SpeedTier::kFalcon) { 1199 tree_ = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels, 1200 max_bitdepth, stream_options_[0].max_properties); 1201 } else if (cparams_.speed_tier <= SpeedTier::kThunder) { 1202 tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC, 1203 total_pixels, max_bitdepth, 1204 stream_options_[0].max_properties); 1205 } else { 1206 tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)}; 1207 } 1208 } 1209 tree_tokens_.resize(1); 1210 tree_tokens_[0].clear(); 1211 Tree decoded_tree; 1212 JXL_RETURN_IF_ERROR(TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree)); 1213 JXL_ENSURE(tree_.size() == decoded_tree.size()); 1214 tree_ = std::move(decoded_tree); 1215 1216 /* TODO(szabadka) Add text output callback to cparams 1217 if (kPrintTree && WantDebugOutput(aux_out)) { 1218 if (frame_header.dc_level > 0) { 1219 PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" + 1220 std::to_string(frame_header.dc_level) + "_tree"); 1221 } else { 1222 PrintTree(tree_, aux_out->debug_prefix + "/global_tree"); 1223 } 1224 } */ 1225 return true; 1226 } 1227 1228 Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) { 1229 size_t num_streams = stream_images_.size(); 1230 stream_headers_.resize(num_streams); 1231 tokens_.resize(num_streams); 1232 image_widths_.resize(num_streams); 1233 const auto process_stream = [&](const uint32_t stream_id, 1234 size_t /* thread */) -> Status { 1235 AuxOut my_aux_out; 1236 tokens_[stream_id].clear(); 1237 JXL_RETURN_IF_ERROR(ModularGenericCompress( 1238 stream_images_[stream_id], stream_options_[stream_id], 1239 /*writer=*/nullptr, &my_aux_out, LayerType::Header, stream_id, 1240 /*tree_samples=*/nullptr, 1241 /*total_pixels=*/nullptr, 1242 /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id], 1243 /*tokens=*/&tokens_[stream_id], 1244 /*widths=*/&image_widths_[stream_id])); 1245 return true; 1246 }; 1247 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_streams, ThreadPool::NoInit, 1248 process_stream, "ComputeTokens")); 1249 return true; 1250 } 1251 1252 Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode, 1253 BitWriter* writer, 1254 AuxOut* aux_out) { 1255 JxlMemoryManager* memory_manager = writer->memory_manager(); 1256 bool skip_rest = false; 1257 JXL_RETURN_IF_ERROR( 1258 writer->WithMaxBits(1, LayerType::ModularTree, aux_out, [&] { 1259 // If we are using brotli, or not using modular mode. 1260 if (tree_tokens_.empty() || tree_tokens_[0].empty()) { 1261 writer->Write(1, 0); 1262 skip_rest = true; 1263 } else { 1264 writer->Write(1, 1); 1265 } 1266 return true; 1267 })); 1268 if (skip_rest) return true; 1269 1270 // Write tree 1271 HistogramParams params = 1272 HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode); 1273 { 1274 EntropyEncodingData tree_code; 1275 std::vector<uint8_t> tree_context_map; 1276 JXL_ASSIGN_OR_RETURN( 1277 size_t cost, 1278 BuildAndEncodeHistograms(memory_manager, params, kNumTreeContexts, 1279 tree_tokens_, &tree_code, &tree_context_map, 1280 writer, LayerType::ModularTree, aux_out)); 1281 (void)cost; 1282 JXL_RETURN_IF_ERROR(WriteTokens(tree_tokens_[0], tree_code, 1283 tree_context_map, 0, writer, 1284 LayerType::ModularTree, aux_out)); 1285 } 1286 params.streaming_mode = streaming_mode; 1287 params.add_missing_symbols = streaming_mode; 1288 params.image_widths = image_widths_; 1289 // Write histograms. 1290 JXL_ASSIGN_OR_RETURN( 1291 size_t cost, 1292 BuildAndEncodeHistograms(memory_manager, params, (tree_.size() + 1) / 2, 1293 tokens_, &code_, &context_map_, writer, 1294 LayerType::ModularGlobal, aux_out)); 1295 (void)cost; 1296 return true; 1297 } 1298 1299 Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out, 1300 LayerType layer, 1301 const ModularStreamId& stream) { 1302 size_t stream_id = stream.ID(frame_dim_); 1303 if (stream_images_[stream_id].channel.empty()) { 1304 JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id); 1305 return true; // Image with no channels, header never gets decoded. 1306 } 1307 if (tokens_.empty()) { 1308 JXL_RETURN_IF_ERROR(ModularGenericCompress( 1309 stream_images_[stream_id], stream_options_[stream_id], writer, aux_out, 1310 layer, stream_id)); 1311 } else { 1312 JXL_RETURN_IF_ERROR( 1313 Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out)); 1314 JXL_RETURN_IF_ERROR(WriteTokens(tokens_[stream_id], code_, context_map_, 0, 1315 writer, layer, aux_out)); 1316 } 1317 return true; 1318 } 1319 1320 void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) { 1321 size_t stream_id = stream.ID(frame_dim_); 1322 Image empty_image(stream_images_[stream_id].memory_manager()); 1323 std::swap(stream_images_[stream_id], empty_image); 1324 } 1325 1326 void ModularFrameEncoder::ClearModularStreamData() { 1327 for (const auto& group : stream_params_) { 1328 ClearStreamData(group.id); 1329 } 1330 stream_params_.clear(); 1331 } 1332 1333 size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId( 1334 size_t dc_group_id, size_t ac_group_id, 1335 const FrameDimensions& patch_dim) const { 1336 size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups; 1337 size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups; 1338 size_t ac_group_x = ac_group_id % patch_dim.xsize_groups; 1339 size_t ac_group_y = ac_group_id / patch_dim.xsize_groups; 1340 return (dc_group_x * 8 + ac_group_x) + 1341 (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups; 1342 } 1343 1344 Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect, 1345 const CompressParams& cparams_, 1346 int minShift, int maxShift, 1347 const ModularStreamId& stream, 1348 bool do_color, bool groupwise) { 1349 size_t stream_id = stream.ID(frame_dim_); 1350 if (stream_id == 0 && frame_dim_.num_groups != 1) { 1351 // If we have multiple groups, then the stream with ID 0 holds the full 1352 // image and we do not want to apply transforms or in general change the 1353 // pixel values. 1354 return true; 1355 } 1356 Image& full_image = stream_images_[0]; 1357 JxlMemoryManager* memory_manager = full_image.memory_manager(); 1358 const size_t xsize = rect.xsize(); 1359 const size_t ysize = rect.ysize(); 1360 Image& gi = stream_images_[stream_id]; 1361 if (stream_id > 0) { 1362 JXL_ASSIGN_OR_RETURN(gi, Image::Create(memory_manager, xsize, ysize, 1363 full_image.bitdepth, 0)); 1364 // start at the first bigger-than-frame_dim.group_dim non-metachannel 1365 size_t c = full_image.nb_meta_channels; 1366 if (!groupwise) { 1367 for (; c < full_image.channel.size(); c++) { 1368 Channel& fc = full_image.channel[c]; 1369 if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; 1370 } 1371 } 1372 for (; c < full_image.channel.size(); c++) { 1373 Channel& fc = full_image.channel[c]; 1374 int shift = std::min(fc.hshift, fc.vshift); 1375 if (shift > maxShift) continue; 1376 if (shift < minShift) continue; 1377 Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, 1378 rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); 1379 if (r.xsize() == 0 || r.ysize() == 0) continue; 1380 gi_channel_[stream_id].push_back(c); 1381 JXL_ASSIGN_OR_RETURN( 1382 Channel gc, Channel::Create(memory_manager, r.xsize(), r.ysize())); 1383 gc.hshift = fc.hshift; 1384 gc.vshift = fc.vshift; 1385 for (size_t y = 0; y < r.ysize(); ++y) { 1386 memcpy(gc.Row(y), r.ConstRow(fc.plane, y), 1387 r.xsize() * sizeof(pixel_type)); 1388 } 1389 gi.channel.emplace_back(std::move(gc)); 1390 } 1391 1392 if (gi.channel.empty()) return true; 1393 // Do some per-group transforms 1394 1395 // Local palette transforms 1396 // TODO(veluca): make this work with quantize-after-prediction in lossy 1397 // mode. 1398 if (cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette && 1399 cparams_.speed_tier < SpeedTier::kCheetah) { 1400 int max_bitdepth = 0, maxval = 0; // don't care about that here 1401 float channel_color_percent = 0; 1402 if (!(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) { 1403 channel_color_percent = cparams_.channel_colors_percent; 1404 } 1405 try_palettes(gi, max_bitdepth, maxval, cparams_, channel_color_percent); 1406 } 1407 } 1408 1409 // lossless and no specific color transform specified: try Nothing, YCoCg, 1410 // and 17 RCTs 1411 if (cparams_.color_transform == ColorTransform::kNone && 1412 cparams_.IsLossless() && cparams_.colorspace < 0 && 1413 gi.channel.size() - gi.nb_meta_channels >= 3 && 1414 cparams_.responsive == JXL_FALSE && do_color && 1415 cparams_.speed_tier <= SpeedTier::kHare) { 1416 Transform sg(TransformId::kRCT); 1417 sg.begin_c = gi.nb_meta_channels; 1418 size_t nb_rcts_to_try = 0; 1419 switch (cparams_.speed_tier) { 1420 case SpeedTier::kLightning: 1421 case SpeedTier::kThunder: 1422 case SpeedTier::kFalcon: 1423 case SpeedTier::kCheetah: 1424 nb_rcts_to_try = 0; // Just do global YCoCg 1425 break; 1426 case SpeedTier::kHare: 1427 nb_rcts_to_try = 4; 1428 break; 1429 case SpeedTier::kWombat: 1430 nb_rcts_to_try = 5; 1431 break; 1432 case SpeedTier::kSquirrel: 1433 nb_rcts_to_try = 7; 1434 break; 1435 case SpeedTier::kKitten: 1436 nb_rcts_to_try = 9; 1437 break; 1438 case SpeedTier::kTectonicPlate: 1439 case SpeedTier::kGlacier: 1440 case SpeedTier::kTortoise: 1441 nb_rcts_to_try = 19; 1442 break; 1443 } 1444 float best_cost = std::numeric_limits<float>::max(); 1445 size_t best_rct = 0; 1446 // These should be 19 actually different transforms; the remaining ones 1447 // are equivalent to one of these (note that the first two are do-nothing 1448 // and YCoCg) modulo channel reordering (which only matters in the case of 1449 // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR) 1450 for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5, 1451 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4, 1452 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4, 1453 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) { 1454 if (nb_rcts_to_try == 0) break; 1455 sg.rct_type = i; 1456 nb_rcts_to_try--; 1457 if (do_transform(gi, sg, weighted::Header())) { 1458 float cost = EstimateCost(gi); 1459 if (cost < best_cost) { 1460 best_rct = i; 1461 best_cost = cost; 1462 } 1463 Transform t = gi.transform.back(); 1464 JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr)); 1465 gi.transform.pop_back(); 1466 } 1467 } 1468 // Apply the best RCT to the image for future encoding. 1469 sg.rct_type = best_rct; 1470 do_transform(gi, sg, weighted::Header()); 1471 } else { 1472 // No need to try anything, just use the default options. 1473 } 1474 size_t nb_wp_modes = 1; 1475 if (cparams_.speed_tier <= SpeedTier::kTortoise) { 1476 nb_wp_modes = 5; 1477 } else if (cparams_.speed_tier <= SpeedTier::kKitten) { 1478 nb_wp_modes = 2; 1479 } 1480 if (nb_wp_modes > 1 && 1481 (stream_options_[stream_id].predictor == Predictor::Weighted || 1482 stream_options_[stream_id].predictor == Predictor::Best || 1483 stream_options_[stream_id].predictor == Predictor::Variable)) { 1484 float best_cost = std::numeric_limits<float>::max(); 1485 stream_options_[stream_id].wp_mode = 0; 1486 for (size_t i = 0; i < nb_wp_modes; i++) { 1487 float cost = EstimateWPCost(gi, i); 1488 if (cost < best_cost) { 1489 best_cost = cost; 1490 stream_options_[stream_id].wp_mode = i; 1491 } 1492 } 1493 } 1494 return true; 1495 } 1496 1497 constexpr float q_deadzone = 0.62f; 1498 int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y, 1499 size_t w, weighted::State* wp_state, float value, 1500 float inv_factor) { 1501 float svalue = value * inv_factor; 1502 PredictionResult pred = 1503 PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state); 1504 svalue -= pred.guess; 1505 if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; 1506 int residual = roundf(svalue); 1507 if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; 1508 return residual + pred.guess; 1509 } 1510 1511 int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x, 1512 size_t y, size_t w, float value, float inv_factor) { 1513 float svalue = value * inv_factor; 1514 PredictionResult pred = 1515 PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient); 1516 svalue -= pred.guess; 1517 if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; 1518 int residual = roundf(svalue); 1519 if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; 1520 return residual + pred.guess; 1521 } 1522 1523 Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header, 1524 const Image3F& dc, const Rect& r, 1525 size_t group_index, bool nl_dc, 1526 PassesEncoderState* enc_state, 1527 bool jpeg_transcode) { 1528 JxlMemoryManager* memory_manager = dc.memory_manager(); 1529 extra_dc_precision[group_index] = nl_dc ? 1 : 0; 1530 float mul = 1 << extra_dc_precision[group_index]; 1531 1532 size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_); 1533 stream_options_[stream_id].max_chan_size = 0xFFFFFF; 1534 stream_options_[stream_id].predictor = Predictor::Weighted; 1535 stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly; 1536 if (cparams_.speed_tier >= SpeedTier::kSquirrel) { 1537 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC; 1538 } 1539 if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) { 1540 stream_options_[stream_id].predictor = 1541 (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable 1542 : Predictor::Best); 1543 stream_options_[stream_id].wp_tree_mode = 1544 ModularOptions::TreeMode::kDefault; 1545 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; 1546 } 1547 if (cparams_.decoding_speed_tier >= 1) { 1548 stream_options_[stream_id].tree_kind = 1549 ModularOptions::TreeKind::kGradientFixedDC; 1550 } 1551 stream_options_[stream_id].histogram_params = 1552 stream_options_[0].histogram_params; 1553 1554 JXL_ASSIGN_OR_RETURN( 1555 stream_images_[stream_id], 1556 Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 3)); 1557 const ColorCorrelation& color_correlation = enc_state->shared.cmap.base(); 1558 if (nl_dc && stream_options_[stream_id].tree_kind == 1559 ModularOptions::TreeKind::kGradientFixedDC) { 1560 JXL_ENSURE(frame_header.chroma_subsampling.Is444()); 1561 for (size_t c : {1, 0, 2}) { 1562 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1563 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1564 float cfl_factor = color_correlation.DCFactors()[c]; 1565 for (size_t y = 0; y < r.ysize(); y++) { 1566 int32_t* quant_row = 1567 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1568 size_t stride = stream_images_[stream_id] 1569 .channel[c < 2 ? c ^ 1 : c] 1570 .plane.PixelsPerRow(); 1571 const float* row = r.ConstPlaneRow(dc, c, y); 1572 if (c == 1) { 1573 for (size_t x = 0; x < r.xsize(); x++) { 1574 quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y, 1575 r.xsize(), row[x], inv_factor); 1576 } 1577 } else { 1578 int32_t* quant_row_y = 1579 stream_images_[stream_id].channel[0].plane.Row(y); 1580 for (size_t x = 0; x < r.xsize(); x++) { 1581 quant_row[x] = QuantizeGradient( 1582 quant_row, stride, c, x, y, r.xsize(), 1583 row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); 1584 } 1585 } 1586 } 1587 } 1588 } else if (nl_dc) { 1589 JXL_ENSURE(frame_header.chroma_subsampling.Is444()); 1590 for (size_t c : {1, 0, 2}) { 1591 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1592 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1593 float cfl_factor = color_correlation.DCFactors()[c]; 1594 weighted::Header header; 1595 weighted::State wp_state(header, r.xsize(), r.ysize()); 1596 for (size_t y = 0; y < r.ysize(); y++) { 1597 int32_t* quant_row = 1598 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1599 size_t stride = stream_images_[stream_id] 1600 .channel[c < 2 ? c ^ 1 : c] 1601 .plane.PixelsPerRow(); 1602 const float* row = r.ConstPlaneRow(dc, c, y); 1603 if (c == 1) { 1604 for (size_t x = 0; x < r.xsize(); x++) { 1605 quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(), 1606 &wp_state, row[x], inv_factor); 1607 wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); 1608 } 1609 } else { 1610 int32_t* quant_row_y = 1611 stream_images_[stream_id].channel[0].plane.Row(y); 1612 for (size_t x = 0; x < r.xsize(); x++) { 1613 quant_row[x] = QuantizeWP( 1614 quant_row, stride, c, x, y, r.xsize(), &wp_state, 1615 row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); 1616 wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); 1617 } 1618 } 1619 } 1620 } 1621 } else if (frame_header.chroma_subsampling.Is444()) { 1622 for (size_t c : {1, 0, 2}) { 1623 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1624 float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; 1625 float cfl_factor = color_correlation.DCFactors()[c]; 1626 for (size_t y = 0; y < r.ysize(); y++) { 1627 int32_t* quant_row = 1628 stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); 1629 const float* row = r.ConstPlaneRow(dc, c, y); 1630 if (c == 1) { 1631 for (size_t x = 0; x < r.xsize(); x++) { 1632 quant_row[x] = roundf(row[x] * inv_factor); 1633 } 1634 } else { 1635 int32_t* quant_row_y = 1636 stream_images_[stream_id].channel[0].plane.Row(y); 1637 for (size_t x = 0; x < r.xsize(); x++) { 1638 quant_row[x] = 1639 roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) * 1640 inv_factor); 1641 } 1642 } 1643 } 1644 } 1645 } else { 1646 for (size_t c : {1, 0, 2}) { 1647 Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c), 1648 r.y0() >> frame_header.chroma_subsampling.VShift(c), 1649 r.xsize() >> frame_header.chroma_subsampling.HShift(c), 1650 r.ysize() >> frame_header.chroma_subsampling.VShift(c)); 1651 float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; 1652 size_t ys = rect.ysize(); 1653 size_t xs = rect.xsize(); 1654 Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c]; 1655 ch.w = xs; 1656 ch.h = ys; 1657 JXL_RETURN_IF_ERROR(ch.shrink()); 1658 for (size_t y = 0; y < ys; y++) { 1659 int32_t* quant_row = ch.plane.Row(y); 1660 const float* row = rect.ConstPlaneRow(dc, c, y); 1661 for (size_t x = 0; x < xs; x++) { 1662 quant_row[x] = roundf(row[x] * inv_factor); 1663 } 1664 } 1665 } 1666 } 1667 1668 DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc, 1669 stream_images_[stream_id], enc_state->shared.quantizer.MulDC(), 1670 1.0 / mul, color_correlation.DCFactors(), 1671 frame_header.chroma_subsampling, enc_state->shared.block_ctx_map); 1672 return true; 1673 } 1674 1675 Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index, 1676 bool jpeg_transcode, 1677 PassesEncoderState* enc_state) { 1678 JxlMemoryManager* memory_manager = enc_state->memory_manager(); 1679 size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_); 1680 stream_options_[stream_id].max_chan_size = 0xFFFFFF; 1681 if (stream_options_[stream_id].predictor != Predictor::Weighted) { 1682 stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP; 1683 } 1684 if (jpeg_transcode) { 1685 stream_options_[stream_id].tree_kind = 1686 ModularOptions::TreeKind::kJpegTranscodeACMeta; 1687 } else if (cparams_.speed_tier >= SpeedTier::kFalcon) { 1688 stream_options_[stream_id].tree_kind = 1689 ModularOptions::TreeKind::kFalconACMeta; 1690 } else if (cparams_.speed_tier > SpeedTier::kKitten) { 1691 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta; 1692 } 1693 // If we are using a non-constant CfL field, and are in a slow enough mode, 1694 // re-enable tree computation for it. 1695 if (cparams_.speed_tier < SpeedTier::kSquirrel && 1696 cparams_.force_cfl_jpeg_recompression) { 1697 stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; 1698 } 1699 stream_options_[stream_id].histogram_params = 1700 stream_options_[0].histogram_params; 1701 // YToX, YToB, ACS + QF, EPF 1702 Image& image = stream_images_[stream_id]; 1703 JXL_ASSIGN_OR_RETURN( 1704 image, Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 4)); 1705 static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); 1706 Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); 1707 JXL_ASSIGN_OR_RETURN( 1708 image.channel[0], 1709 Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); 1710 JXL_ASSIGN_OR_RETURN( 1711 image.channel[1], 1712 Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); 1713 JXL_ASSIGN_OR_RETURN( 1714 image.channel[2], 1715 Channel::Create(memory_manager, r.xsize() * r.ysize(), 2, 0, 0)); 1716 JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map, 1717 Rect(image.channel[0].plane), 1718 &image.channel[0].plane)); 1719 JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map, 1720 Rect(image.channel[1].plane), 1721 &image.channel[1].plane)); 1722 size_t num = 0; 1723 for (size_t y = 0; y < r.ysize(); y++) { 1724 AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y); 1725 const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y); 1726 const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y); 1727 int32_t* out_acs = image.channel[2].plane.Row(0); 1728 int32_t* out_qf = image.channel[2].plane.Row(1); 1729 int32_t* row_out_epf = image.channel[3].plane.Row(y); 1730 for (size_t x = 0; x < r.xsize(); x++) { 1731 row_out_epf[x] = row_epf[x]; 1732 if (!row_acs[x].IsFirstBlock()) continue; 1733 out_acs[num] = row_acs[x].RawStrategy(); 1734 out_qf[num] = row_qf[x] - 1; 1735 num++; 1736 } 1737 } 1738 image.channel[2].w = num; 1739 ac_metadata_size[group_index] = num; 1740 return true; 1741 } 1742 1743 Status ModularFrameEncoder::EncodeQuantTable( 1744 JxlMemoryManager* memory_manager, size_t size_x, size_t size_y, 1745 BitWriter* writer, const QuantEncoding& encoding, size_t idx, 1746 ModularFrameEncoder* modular_frame_encoder) { 1747 JXL_ENSURE(encoding.qraw.qtable); 1748 JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size()); 1749 JXL_ENSURE(idx < kNumQuantTables); 1750 int* qtable = encoding.qraw.qtable->data(); 1751 JXL_RETURN_IF_ERROR(F16Coder::Write(encoding.qraw.qtable_den, writer)); 1752 if (modular_frame_encoder) { 1753 JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx)); 1754 JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream( 1755 writer, nullptr, LayerType::Header, qt)); 1756 return true; 1757 } 1758 JXL_ASSIGN_OR_RETURN(Image image, 1759 Image::Create(memory_manager, size_x, size_y, 8, 3)); 1760 for (size_t c = 0; c < 3; c++) { 1761 for (size_t y = 0; y < size_y; y++) { 1762 int32_t* JXL_RESTRICT row = image.channel[c].Row(y); 1763 for (size_t x = 0; x < size_x; x++) { 1764 row[x] = qtable[c * size_x * size_y + y * size_x + x]; 1765 } 1766 } 1767 } 1768 ModularOptions cfopts; 1769 JXL_RETURN_IF_ERROR(ModularGenericCompress(image, cfopts, writer)); 1770 return true; 1771 } 1772 1773 Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y, 1774 const QuantEncoding& encoding, 1775 size_t idx) { 1776 JXL_ENSURE(idx < kNumQuantTables); 1777 JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx)); 1778 size_t stream_id = qt.ID(frame_dim_); 1779 JXL_ENSURE(encoding.qraw.qtable); 1780 JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size()); 1781 int* qtable = encoding.qraw.qtable->data(); 1782 Image& image = stream_images_[stream_id]; 1783 JxlMemoryManager* memory_manager = image.memory_manager(); 1784 JXL_ASSIGN_OR_RETURN(image, 1785 Image::Create(memory_manager, size_x, size_y, 8, 3)); 1786 for (size_t c = 0; c < 3; c++) { 1787 for (size_t y = 0; y < size_y; y++) { 1788 int32_t* JXL_RESTRICT row = image.channel[c].Row(y); 1789 for (size_t x = 0; x < size_x; x++) { 1790 row[x] = qtable[c * size_x * size_y + y * size_x + x]; 1791 } 1792 } 1793 } 1794 return true; 1795 } 1796 } // namespace jxl