tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

enc_modular.cc (74353B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/enc_modular.h"
      7 
      8 #include <jxl/memory_manager.h>
      9 
     10 #include <array>
     11 #include <cstddef>
     12 #include <cstdint>
     13 #include <limits>
     14 #include <utility>
     15 #include <vector>
     16 
     17 #include "lib/jxl/base/compiler_specific.h"
     18 #include "lib/jxl/base/printf_macros.h"
     19 #include "lib/jxl/base/rect.h"
     20 #include "lib/jxl/base/status.h"
     21 #include "lib/jxl/chroma_from_luma.h"
     22 #include "lib/jxl/compressed_dc.h"
     23 #include "lib/jxl/dec_ans.h"
     24 #include "lib/jxl/dec_modular.h"
     25 #include "lib/jxl/enc_aux_out.h"
     26 #include "lib/jxl/enc_bit_writer.h"
     27 #include "lib/jxl/enc_cluster.h"
     28 #include "lib/jxl/enc_fields.h"
     29 #include "lib/jxl/enc_gaborish.h"
     30 #include "lib/jxl/enc_params.h"
     31 #include "lib/jxl/enc_patch_dictionary.h"
     32 #include "lib/jxl/enc_quant_weights.h"
     33 #include "lib/jxl/frame_dimensions.h"
     34 #include "lib/jxl/frame_header.h"
     35 #include "lib/jxl/modular/encoding/context_predict.h"
     36 #include "lib/jxl/modular/encoding/enc_encoding.h"
     37 #include "lib/jxl/modular/encoding/encoding.h"
     38 #include "lib/jxl/modular/encoding/ma_common.h"
     39 #include "lib/jxl/modular/modular_image.h"
     40 #include "lib/jxl/modular/options.h"
     41 #include "lib/jxl/modular/transform/enc_transform.h"
     42 #include "lib/jxl/pack_signed.h"
     43 #include "lib/jxl/quant_weights.h"
     44 #include "modular/options.h"
     45 
     46 namespace jxl {
     47 
     48 namespace {
     49 // constexpr bool kPrintTree = false;
     50 
     51 // Squeeze default quantization factors
     52 // these quantization factors are for -Q 50  (other qualities simply scale the
     53 // factors; things are rounded down and obviously cannot get below 1)
     54 const float squeeze_quality_factor =
     55    0.35;  // for easy tweaking of the quality range (decrease this number for
     56           // higher quality)
     57 const float squeeze_luma_factor =
     58    1.1;  // for easy tweaking of the balance between luma (or anything
     59          // non-chroma) and chroma (decrease this number for higher quality
     60          // luma)
     61 const float squeeze_quality_factor_xyb = 4.8f;
     62 const float squeeze_xyb_qtable[3][16] = {
     63    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
     64     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
     65    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
     66     0.5},  // X
     67    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
     68     0.5},  // B-Y
     69 };
     70 
     71 const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12,
     72                                       2.56,   1.28,  0.64,  0.32,  0.16,  0.08,
     73                                       0.04,   0.02,  0.01,  0.005};
     74 // for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
     75 // does 4:2:0 subsampling (two most fine grained layers get quantized away)
     76 const float squeeze_chroma_qtable[16] = {
     77    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
     78 
     79 // Merges the trees in `trees` using nodes that decide on stream_id, as defined
     80 // by `tree_splits`.
     81 Status MergeTrees(const std::vector<Tree>& trees,
     82                  const std::vector<size_t>& tree_splits, size_t begin,
     83                  size_t end, Tree* tree) {
     84  JXL_ENSURE(trees.size() + 1 == tree_splits.size());
     85  JXL_ENSURE(end > begin);
     86  JXL_ENSURE(end <= trees.size());
     87  if (end == begin + 1) {
     88    // Insert the tree, adding the opportune offset to all child nodes.
     89    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
     90    // them.
     91    size_t sz = tree->size();
     92    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
     93    for (size_t i = sz; i < tree->size(); i++) {
     94      (*tree)[i].lchild += sz;
     95      (*tree)[i].rchild += sz;
     96    }
     97    return true;
     98  }
     99  size_t mid = (begin + end) / 2;
    100  size_t splitval = tree_splits[mid] - 1;
    101  size_t cur = tree->size();
    102  tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
    103  (*tree)[cur].lchild = tree->size();
    104  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, mid, end, tree));
    105  (*tree)[cur].rchild = tree->size();
    106  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, begin, mid, tree));
    107  return true;
    108 }
    109 
    110 void QuantizeChannel(Channel& ch, const int q) {
    111  if (q == 1) return;
    112  for (size_t y = 0; y < ch.plane.ysize(); y++) {
    113    pixel_type* row = ch.plane.Row(y);
    114    for (size_t x = 0; x < ch.plane.xsize(); x++) {
    115      if (row[x] < 0) {
    116        row[x] = -((-row[x] + q / 2) / q) * q;
    117      } else {
    118        row[x] = ((row[x] + q / 2) / q) * q;
    119      }
    120    }
    121  }
    122 }
    123 
    124 // convert binary32 float that corresponds to custom [bits]-bit float (with
    125 // [exp_bits] exponent bits) to a [bits]-bit integer representation that should
    126 // fit in pixel_type
    127 Status float_to_int(const float* const row_in, pixel_type* const row_out,
    128                    size_t xsize, unsigned int bits, unsigned int exp_bits,
    129                    bool fp, double dfactor) {
    130  JXL_ENSURE(sizeof(pixel_type) * 8 >= bits);
    131  if (!fp) {
    132    if (bits > 22) {
    133      for (size_t x = 0; x < xsize; ++x) {
    134        row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
    135      }
    136    } else {
    137      float factor = dfactor;
    138      for (size_t x = 0; x < xsize; ++x) {
    139        row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
    140      }
    141    }
    142    return true;
    143  }
    144  if (bits == 32 && fp) {
    145    JXL_ENSURE(exp_bits == 8);
    146    memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in),
    147           4 * xsize);
    148    return true;
    149  }
    150 
    151  JXL_ENSURE(bits > 0);
    152  int exp_bias = (1 << (exp_bits - 1)) - 1;
    153  int max_exp = (1 << exp_bits) - 1;
    154  uint32_t sign = (1u << (bits - 1));
    155  int mant_bits = bits - exp_bits - 1;
    156  int mant_shift = 23 - mant_bits;
    157  for (size_t x = 0; x < xsize; ++x) {
    158    uint32_t f;
    159    memcpy(&f, &row_in[x], 4);
    160    int signbit = (f >> 31);
    161    f &= 0x7fffffff;
    162    if (f == 0) {
    163      row_out[x] = (signbit ? sign : 0);
    164      continue;
    165    }
    166    int exp = (f >> 23) - 127;
    167    if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
    168    int mantissa = (f & 0x007fffff);
    169    // broke up the binary32 into its parts, now reassemble into
    170    // arbitrary float
    171    exp += exp_bias;
    172    if (exp < 0) {  // will become a subnormal number
    173      // add implicit leading 1 to mantissa
    174      mantissa |= 0x00800000;
    175      if (exp < -mant_bits) {
    176        return JXL_FAILURE(
    177            "Invalid float number: %g cannot be represented with %i "
    178            "exp_bits and %i mant_bits (exp %i)",
    179            row_in[x], exp_bits, mant_bits, exp);
    180      }
    181      mantissa >>= 1 - exp;
    182      exp = 0;
    183    }
    184    // exp should be representable in exp_bits, otherwise input was
    185    // invalid
    186    if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
    187    if (mantissa & ((1 << mant_shift) - 1)) {
    188      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
    189                         mantissa);
    190    }
    191    mantissa >>= mant_shift;
    192    f = (signbit ? sign : 0);
    193    f |= (exp << mant_bits);
    194    f |= mantissa;
    195    row_out[x] = static_cast<pixel_type>(f);
    196  }
    197  return true;
    198 }
    199 
    200 float EstimateWPCost(const Image& img, size_t i) {
    201  size_t extra_bits = 0;
    202  float histo_cost = 0;
    203  HybridUintConfig config;
    204  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
    205                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
    206                       3,    5,    7,    11,   15,   23,  31,  47,  63,
    207                       95,   127,  191,  255,  392,  500};
    208  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
    209  Histogram histo[nc] = {};
    210  weighted::Header wp_header;
    211  PredictorMode(i, &wp_header);
    212  for (const Channel& ch : img.channel) {
    213    const intptr_t onerow = ch.plane.PixelsPerRow();
    214    weighted::State wp_state(wp_header, ch.w, ch.h);
    215    Properties properties(1);
    216    for (size_t y = 0; y < ch.h; y++) {
    217      const pixel_type* JXL_RESTRICT r = ch.Row(y);
    218      for (size_t x = 0; x < ch.w; x++) {
    219        size_t offset = 0;
    220        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
    221        pixel_type_w top = (y ? *(r + x - onerow) : left);
    222        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
    223        pixel_type_w topright =
    224            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
    225        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
    226        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
    227            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
    228            offset);
    229        size_t ctx = 0;
    230        for (int c : cutoffs) {
    231          ctx += (c >= properties[0]) ? 1 : 0;
    232        }
    233        pixel_type res = r[x] - guess;
    234        uint32_t token;
    235        uint32_t nbits;
    236        uint32_t bits;
    237        config.Encode(PackSigned(res), &token, &nbits, &bits);
    238        histo[ctx].Add(token);
    239        extra_bits += nbits;
    240        wp_state.UpdateErrors(r[x], x, y, ch.w);
    241      }
    242    }
    243    for (auto& h : histo) {
    244      histo_cost += h.ShannonEntropy();
    245      h.Clear();
    246    }
    247  }
    248  return histo_cost + extra_bits;
    249 }
    250 
    251 float EstimateCost(const Image& img) {
    252  // TODO(veluca): consider SIMDfication of this code.
    253  size_t extra_bits = 0;
    254  float histo_cost = 0;
    255  HybridUintConfig config;
    256  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
    257                        47, 63, 95, 127, 191, 255, 392, 500};
    258  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
    259  Histogram histo[nc] = {};
    260  for (const Channel& ch : img.channel) {
    261    const intptr_t onerow = ch.plane.PixelsPerRow();
    262    for (size_t y = 0; y < ch.h; y++) {
    263      const pixel_type* JXL_RESTRICT r = ch.Row(y);
    264      for (size_t x = 0; x < ch.w; x++) {
    265        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
    266        pixel_type_w top = (y ? *(r + x - onerow) : left);
    267        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
    268        size_t maxdiff = std::max(std::max(left, top), topleft) -
    269                         std::min(std::min(left, top), topleft);
    270        size_t ctx = 0;
    271        for (uint32_t c : cutoffs) {
    272          ctx += (c > maxdiff) ? 1 : 0;
    273        }
    274        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
    275        uint32_t token;
    276        uint32_t nbits;
    277        uint32_t bits;
    278        config.Encode(PackSigned(res), &token, &nbits, &bits);
    279        histo[ctx].Add(token);
    280        extra_bits += nbits;
    281      }
    282    }
    283    for (auto& h : histo) {
    284      histo_cost += h.ShannonEntropy();
    285      h.Clear();
    286    }
    287  }
    288  return histo_cost + extra_bits;
    289 }
    290 
    291 bool do_transform(Image& image, const Transform& tr,
    292                  const weighted::Header& wp_header,
    293                  jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
    294  Transform t = tr;
    295  bool did_it = true;
    296  if (force_jxlart) {
    297    if (!t.MetaApply(image)) return false;
    298  } else {
    299    did_it = TransformForward(t, image, wp_header, pool);
    300  }
    301  if (did_it) image.transform.push_back(t);
    302  return did_it;
    303 }
    304 
    305 bool maybe_do_transform(Image& image, const Transform& tr,
    306                        const CompressParams& cparams,
    307                        const weighted::Header& wp_header, float cost_before,
    308                        jxl::ThreadPool* pool = nullptr,
    309                        bool force_jxlart = false) {
    310  if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) {
    311    return do_transform(image, tr, wp_header, pool, force_jxlart);
    312  }
    313  bool did_it = do_transform(image, tr, wp_header, pool);
    314  if (did_it) {
    315    float cost_after = EstimateCost(image);
    316    JXL_DEBUG_V(7, "Cost before: %f  cost after: %f", cost_before, cost_after);
    317    if (cost_after > cost_before) {
    318      Transform t = image.transform.back();
    319      JXL_RETURN_IF_ERROR(t.Inverse(image, wp_header, pool));
    320      image.transform.pop_back();
    321      did_it = false;
    322    }
    323  }
    324  return did_it;
    325 }
    326 
    327 void try_palettes(Image& gi, int& max_bitdepth, int& maxval,
    328                  const CompressParams& cparams_, float channel_colors_percent,
    329                  jxl::ThreadPool* pool = nullptr) {
    330  float cost_before = 0.f;
    331  size_t did_palette = 0;
    332  float nb_pixels = gi.channel[0].w * gi.channel[0].h;
    333  int nb_chans = gi.channel.size() - gi.nb_meta_channels;
    334  // arbitrary estimate: 4.8 bpp for 8-bit RGB
    335  float arbitrary_bpp_estimate = 0.2f * gi.bitdepth * nb_chans;
    336 
    337  if (cparams_.palette_colors != 0 || cparams_.lossy_palette) {
    338    // when not estimating, assume some arbitrary bpp
    339    cost_before = cparams_.speed_tier <= SpeedTier::kSquirrel
    340                      ? EstimateCost(gi)
    341                      : nb_pixels * arbitrary_bpp_estimate;
    342    // all-channel palette (e.g. RGBA)
    343    if (nb_chans > 1) {
    344      Transform maybe_palette(TransformId::kPalette);
    345      maybe_palette.begin_c = gi.nb_meta_channels;
    346      maybe_palette.num_c = nb_chans;
    347      // Heuristic choice of max colors for a palette:
    348      // max_colors = nb_pixels * estimated_bpp_without_palette * 0.0005 +
    349      //              + nb_pixels / 128 + 128
    350      //       (estimated_bpp_without_palette = cost_before / nb_pixels)
    351      // Rationale: small image with large palette is not effective;
    352      // also if the entropy (estimated bpp) is low (e.g. mostly solid/gradient
    353      // areas), palette is less useful and may even be counterproductive.
    354      maybe_palette.nb_colors = std::min(
    355          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
    356          std::abs(cparams_.palette_colors));
    357      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
    358      maybe_palette.lossy_palette =
    359          (cparams_.lossy_palette && maybe_palette.num_c == 3);
    360      if (maybe_palette.lossy_palette) {
    361        maybe_palette.predictor = Predictor::Average4;
    362      }
    363      // TODO(veluca): use a custom weighted header if using the weighted
    364      // predictor.
    365      if (maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(),
    366                             cost_before, pool, cparams_.options.zero_tokens)) {
    367        did_palette = 1;
    368      };
    369    }
    370    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
    371    // separate K)
    372    if (!did_palette && nb_chans > 3) {
    373      Transform maybe_palette_3(TransformId::kPalette);
    374      maybe_palette_3.begin_c = gi.nb_meta_channels;
    375      maybe_palette_3.num_c = nb_chans - 1;
    376      maybe_palette_3.nb_colors = std::min(
    377          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
    378          std::abs(cparams_.palette_colors));
    379      maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
    380      maybe_palette_3.lossy_palette = cparams_.lossy_palette;
    381      if (maybe_palette_3.lossy_palette) {
    382        maybe_palette_3.predictor = Predictor::Average4;
    383      }
    384      if (maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(),
    385                             cost_before, pool, cparams_.options.zero_tokens)) {
    386        did_palette = 1;
    387      }
    388    }
    389  }
    390 
    391  if (channel_colors_percent > 0) {
    392    // single channel palette (like FLIF's ChannelCompact)
    393    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels - did_palette;
    394    int orig_bitdepth = max_bitdepth;
    395    max_bitdepth = 0;
    396    if (nb_channels > 0 && (did_palette || cost_before == 0)) {
    397      cost_before =
    398          cparams_.speed_tier < SpeedTier::kSquirrel ? EstimateCost(gi) : 0;
    399    }
    400    for (size_t i = did_palette; i < nb_channels + did_palette; i++) {
    401      int32_t min;
    402      int32_t max;
    403      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
    404      int64_t colors = static_cast<int64_t>(max) - min + 1;
    405      JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
    406      Transform maybe_palette_1(TransformId::kPalette);
    407      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
    408      maybe_palette_1.num_c = 1;
    409      // simple heuristic: if less than X percent of the values in the range
    410      // actually occur, it is probably worth it to do a compaction
    411      // (but only if the channel palette is less than 6% the size of the
    412      // image itself)
    413      maybe_palette_1.nb_colors =
    414          std::min(static_cast<int>(nb_pixels / 16),
    415                   static_cast<int>(channel_colors_percent / 100. * colors));
    416      if (maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(),
    417                             cost_before, pool)) {
    418        // effective bit depth is lower, adjust quantization accordingly
    419        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
    420        if (max < maxval) maxval = max;
    421        int ch_bitdepth =
    422            (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
    423        if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
    424      } else {
    425        max_bitdepth = orig_bitdepth;
    426      }
    427    }
    428  }
    429 }
    430 
    431 }  // namespace
    432 
    433 StatusOr<ModularFrameEncoder> ModularFrameEncoder::Create(
    434    JxlMemoryManager* memory_manager, const FrameHeader& frame_header,
    435    const CompressParams& cparams_orig, bool streaming_mode) {
    436  ModularFrameEncoder self{memory_manager};
    437  JXL_RETURN_IF_ERROR(self.Init(frame_header, cparams_orig, streaming_mode));
    438  return self;
    439 }
    440 
    441 ModularFrameEncoder::ModularFrameEncoder(JxlMemoryManager* memory_manager)
    442    : memory_manager_(memory_manager) {}
    443 
    444 Status ModularFrameEncoder::Init(const FrameHeader& frame_header,
    445                                 const CompressParams& cparams_orig,
    446                                 bool streaming_mode) {
    447  frame_dim_ = frame_header.ToFrameDimensions();
    448  cparams_ = cparams_orig;
    449 
    450  size_t num_streams =
    451      ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
    452  if (cparams_.ModularPartIsLossless()) {
    453    switch (cparams_.decoding_speed_tier) {
    454      case 0:
    455        break;
    456      case 1:
    457        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
    458        break;
    459      case 2: {
    460        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
    461        cparams_.options.predictor = Predictor::Gradient;
    462        break;
    463      }
    464      case 3: {  // LZ77, no Gradient.
    465        cparams_.options.nb_repeats = 0;
    466        cparams_.options.predictor = Predictor::Gradient;
    467        break;
    468      }
    469      default: {  // LZ77, no predictor.
    470        cparams_.options.nb_repeats = 0;
    471        cparams_.options.predictor = Predictor::Zero;
    472        break;
    473      }
    474    }
    475  }
    476  if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive &&
    477      cparams_.ModularPartIsLossless()) {
    478    cparams_.options.tree_kind =
    479        ModularOptions::TreeKind::kTrivialTreeNoPredictor;
    480    cparams_.options.nb_repeats = 0;
    481  }
    482  for (size_t i = 0; i < num_streams; ++i) {
    483    stream_images_.emplace_back(memory_manager_);
    484  }
    485 
    486  // use a sensible default if nothing explicit is specified:
    487  // Squeeze for lossy, no squeeze for lossless
    488  if (cparams_.responsive < 0) {
    489    if (cparams_.ModularPartIsLossless()) {
    490      cparams_.responsive = 0;
    491    } else {
    492      cparams_.responsive = 1;
    493    }
    494  }
    495 
    496  cparams_.options.splitting_heuristics_node_threshold =
    497      82 + 14 * static_cast<int>(cparams_.speed_tier);
    498 
    499  {
    500    // Set properties.
    501    std::vector<uint32_t> prop_order;
    502    if (cparams_.responsive) {
    503      // Properties in order of their likelihood of being useful for Squeeze
    504      // residuals.
    505      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
    506    } else {
    507      // Same, but for the non-Squeeze case.
    508      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
    509      // if few groups, don't use group as a property
    510      if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise &&
    511          cparams_orig.ModularPartIsLossless()) {
    512        prop_order.erase(prop_order.begin() + 1);
    513      }
    514    }
    515    int max_properties = std::min<int>(
    516        cparams_.options.max_properties,
    517        static_cast<int>(
    518            frame_header.nonserialized_metadata->m.num_extra_channels) +
    519            (frame_header.encoding == FrameEncoding::kModular ? 2 : -1));
    520    switch (cparams_.speed_tier) {
    521      case SpeedTier::kHare:
    522        cparams_.options.splitting_heuristics_properties.assign(
    523            prop_order.begin(), prop_order.begin() + 4);
    524        cparams_.options.max_property_values = 24;
    525        break;
    526      case SpeedTier::kWombat:
    527        cparams_.options.splitting_heuristics_properties.assign(
    528            prop_order.begin(), prop_order.begin() + 5);
    529        cparams_.options.max_property_values = 32;
    530        break;
    531      case SpeedTier::kSquirrel:
    532        cparams_.options.splitting_heuristics_properties.assign(
    533            prop_order.begin(), prop_order.begin() + 7);
    534        cparams_.options.max_property_values = 48;
    535        break;
    536      case SpeedTier::kKitten:
    537        cparams_.options.splitting_heuristics_properties.assign(
    538            prop_order.begin(), prop_order.begin() + 10);
    539        cparams_.options.max_property_values = 96;
    540        break;
    541      case SpeedTier::kGlacier:
    542      case SpeedTier::kTortoise:
    543        cparams_.options.splitting_heuristics_properties = prop_order;
    544        cparams_.options.max_property_values = 256;
    545        break;
    546      default:
    547        cparams_.options.splitting_heuristics_properties.assign(
    548            prop_order.begin(), prop_order.begin() + 3);
    549        cparams_.options.max_property_values = 16;
    550        break;
    551    }
    552    if (cparams_.speed_tier > SpeedTier::kTortoise) {
    553      // Gradient in previous channels.
    554      for (int i = 0; i < max_properties; i++) {
    555        cparams_.options.splitting_heuristics_properties.push_back(
    556            kNumNonrefProperties + i * 4 + 3);
    557      }
    558    } else {
    559      // All the extra properties in Tortoise mode.
    560      for (int i = 0; i < max_properties * 4; i++) {
    561        cparams_.options.splitting_heuristics_properties.push_back(
    562            kNumNonrefProperties + i);
    563      }
    564    }
    565  }
    566 
    567  if ((cparams_.options.predictor == Predictor::Average0 ||
    568       cparams_.options.predictor == Predictor::Average1 ||
    569       cparams_.options.predictor == Predictor::Average2 ||
    570       cparams_.options.predictor == Predictor::Average3 ||
    571       cparams_.options.predictor == Predictor::Average4 ||
    572       cparams_.options.predictor == Predictor::Weighted) &&
    573      !cparams_.ModularPartIsLossless()) {
    574    // Lossy + Average/Weighted predictors does not work, so switch to default
    575    // predictors.
    576    cparams_.options.predictor = kUndefinedPredictor;
    577  }
    578 
    579  if (cparams_.options.predictor == kUndefinedPredictor) {
    580    // no explicit predictor(s) given, set a good default
    581    if ((cparams_.speed_tier <= SpeedTier::kGlacier ||
    582         cparams_.modular_mode == false) &&
    583        cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) {
    584      // TODO(veluca): allow all predictors that don't break residual
    585      // multipliers in lossy mode.
    586      cparams_.options.predictor = Predictor::Variable;
    587    } else if (cparams_.responsive || cparams_.lossy_palette) {
    588      // zero predictor for Squeeze residues and lossy palette
    589      cparams_.options.predictor = Predictor::Zero;
    590    } else if (!cparams_.IsLossless()) {
    591      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
    592      cparams_.options.predictor = Predictor::Gradient;
    593    } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
    594      // try median and weighted predictor for anything else
    595      cparams_.options.predictor = Predictor::Best;
    596    } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
    597      // just weighted predictor in falcon mode
    598      cparams_.options.predictor = Predictor::Weighted;
    599    } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
    600      // just gradient predictor in thunder mode
    601      cparams_.options.predictor = Predictor::Gradient;
    602    }
    603  } else {
    604    if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
    605  }
    606  if (!cparams_.ModularPartIsLossless()) {
    607    if (cparams_.options.predictor == Predictor::Weighted ||
    608        cparams_.options.predictor == Predictor::Variable ||
    609        cparams_.options.predictor == Predictor::Best)
    610      cparams_.options.predictor = Predictor::Zero;
    611  }
    612  tree_splits_.push_back(0);
    613  if (cparams_.modular_mode == false) {
    614    JXL_ASSIGN_OR_RETURN(ModularStreamId qt0, ModularStreamId::QuantTable(0));
    615    cparams_.options.fast_decode_multiplier = 1.0f;
    616    tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
    617    tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
    618    tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
    619    tree_splits_.push_back(qt0.ID(frame_dim_));
    620    tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
    621    ac_metadata_size.resize(frame_dim_.num_dc_groups);
    622    extra_dc_precision.resize(frame_dim_.num_dc_groups);
    623  }
    624  tree_splits_.push_back(num_streams);
    625  cparams_.options.max_chan_size = frame_dim_.group_dim;
    626  cparams_.options.group_dim = frame_dim_.group_dim;
    627 
    628  // TODO(veluca): figure out how to use different predictor sets per channel.
    629  stream_options_.resize(num_streams, cparams_.options);
    630 
    631  stream_options_[0] = cparams_.options;
    632  if (cparams_.speed_tier == SpeedTier::kFalcon) {
    633    stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
    634  } else if (cparams_.speed_tier == SpeedTier::kThunder) {
    635    stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC;
    636  }
    637  stream_options_[0].histogram_params =
    638      HistogramParams::ForModular(cparams_, {}, streaming_mode);
    639  return true;
    640 }
    641 
    642 Status ModularFrameEncoder::ComputeEncodingData(
    643    const FrameHeader& frame_header, const ImageMetadata& metadata,
    644    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
    645    const Rect& group_rect, const FrameDimensions& patch_dim,
    646    const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state,
    647    const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out,
    648    bool do_color) {
    649  JxlMemoryManager* memory_manager = enc_state->memory_manager();
    650  JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
    651              frame_header.DebugString().c_str());
    652 
    653  bool groupwise = enc_state->streaming_mode;
    654 
    655  if (do_color && frame_header.loop_filter.gab && !groupwise) {
    656    float w = 0.9908511000000001f;
    657    float weights[3] = {w, w, w};
    658    JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool));
    659  }
    660 
    661  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
    662      cparams_.speed_tier < SpeedTier::kCheetah &&
    663      cparams_.decoding_speed_tier < 2 && !groupwise) {
    664    JXL_RETURN_IF_ERROR(FindBestPatchDictionary(
    665        *color, enc_state, cms, nullptr, aux_out,
    666        cparams_.color_transform == ColorTransform::kXYB));
    667    JXL_RETURN_IF_ERROR(PatchDictionaryEncoder::SubtractFrom(
    668        enc_state->shared.image_features.patches, color));
    669  }
    670 
    671  if (cparams_.custom_splines.HasAny()) {
    672    PassesSharedState& shared = enc_state->shared;
    673    ImageFeatures& image_features = shared.image_features;
    674    image_features.splines = cparams_.custom_splines;
    675  }
    676 
    677  // Convert ImageBundle to modular Image object
    678  const size_t xsize = patch_dim.xsize;
    679  const size_t ysize = patch_dim.ysize;
    680 
    681  int nb_chans = 3;
    682  if (metadata.color_encoding.IsGray() &&
    683      cparams_.color_transform == ColorTransform::kNone) {
    684    nb_chans = 1;
    685  }
    686  if (!do_color) nb_chans = 0;
    687 
    688  nb_chans += extra_channels.size();
    689 
    690  bool fp = metadata.bit_depth.floating_point_sample &&
    691            cparams_.color_transform != ColorTransform::kXYB;
    692 
    693  // bits_per_sample is just metadata for XYB images.
    694  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
    695      cparams_.color_transform != ColorTransform::kXYB) {
    696    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
    697      return JXL_FAILURE("uint32_t not supported in enc_modular");
    698    } else if (metadata.bit_depth.bits_per_sample > 32) {
    699      return JXL_FAILURE("bits_per_sample > 32 not supported");
    700    }
    701  }
    702 
    703  // in the non-float case, there is an implicit 0 sign bit
    704  int max_bitdepth =
    705      do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
    706  Image& gi = stream_images_[0];
    707  JXL_ASSIGN_OR_RETURN(
    708      gi, Image::Create(memory_manager, xsize, ysize,
    709                        metadata.bit_depth.bits_per_sample, nb_chans));
    710  int c = 0;
    711  if (cparams_.color_transform == ColorTransform::kXYB &&
    712      cparams_.modular_mode == true) {
    713    float enc_factors[3] = {65536.0f, 4096.0f, 4096.0f};
    714    if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
    715      // quantize XYB here and then treat it as a lossless image
    716      enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
    717      enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
    718      enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
    719      cparams_.butteraugli_distance = 0;
    720    }
    721    if (cparams_.manual_xyb_factors.size() == 3) {
    722      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
    723          memory_manager, &enc_state->shared.matrices,
    724          cparams_.manual_xyb_factors.data()));
    725      // TODO(jon): update max_bitdepth in this case
    726    } else {
    727      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
    728          memory_manager, &enc_state->shared.matrices, enc_factors));
    729      max_bitdepth = 12;
    730    }
    731  }
    732  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
    733  if (do_color) {
    734    for (; c < 3; c++) {
    735      if (metadata.color_encoding.IsGray() &&
    736          cparams_.color_transform == ColorTransform::kNone &&
    737          c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
    738        continue;
    739      int c_out = c;
    740      // XYB is encoded as YX(B-Y)
    741      if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
    742        c_out = 1 - c_out;
    743      double factor = maxval;
    744      if (cparams_.color_transform == ColorTransform::kXYB)
    745        factor = enc_state->shared.matrices.InvDCQuant(c);
    746      if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
    747        JXL_ENSURE(!fp);
    748        for (size_t y = 0; y < ysize; ++y) {
    749          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
    750          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
    751          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
    752          for (size_t x = 0; x < xsize; ++x) {
    753            // TODO(eustas): check if std::roundf is appropriate
    754            row_out[x] = row_in[x] * factor + 0.5f;
    755            row_out[x] -= row_Y[x];
    756          }
    757        }
    758      } else {
    759        int bits = metadata.bit_depth.bits_per_sample;
    760        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
    761        gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c);
    762        gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c);
    763        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
    764        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
    765        JXL_RETURN_IF_ERROR(
    766            gi.channel[c_out].shrink(xsize_shifted, ysize_shifted));
    767        const auto process_row = [&](const int task,
    768                                     const int thread) -> Status {
    769          const size_t y = task;
    770          const float* const JXL_RESTRICT row_in =
    771              color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0();
    772          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
    773          JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits,
    774                                           exp_bits, fp, factor));
    775          return true;
    776        };
    777        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
    778                                      ThreadPool::NoInit, process_row,
    779                                      "float2int"));
    780      }
    781    }
    782    if (metadata.color_encoding.IsGray() &&
    783        cparams_.color_transform == ColorTransform::kNone)
    784      c = 1;
    785  }
    786 
    787  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
    788    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
    789    size_t ecups = frame_header.extra_channel_upsampling[ec];
    790    JXL_RETURN_IF_ERROR(
    791        gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups),
    792                             DivCeil(patch_dim.ysize_upsampled, ecups)));
    793    gi.channel[c].hshift = gi.channel[c].vshift =
    794        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
    795 
    796    int bits = eci.bit_depth.bits_per_sample;
    797    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
    798    bool fp = eci.bit_depth.floating_point_sample;
    799    double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
    800    if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1);
    801    const auto process_row = [&](const int task, const int thread) -> Status {
    802      const size_t y = task;
    803      const float* const JXL_RESTRICT row_in =
    804          extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0();
    805      pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
    806      JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out,
    807                                       gi.channel[c].plane.xsize(), bits,
    808                                       exp_bits, fp, factor));
    809      return true;
    810    };
    811    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, gi.channel[c].plane.ysize(),
    812                                  ThreadPool::NoInit, process_row,
    813                                  "float2int"));
    814  }
    815  JXL_ENSURE(c == nb_chans);
    816 
    817  int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
    818  if (max_bitdepth > level_max_bitdepth) {
    819    return JXL_FAILURE(
    820        "Bitdepth too high for level %i (need %i bits, have only %i in this "
    821        "level)",
    822        cparams_.level, max_bitdepth, level_max_bitdepth);
    823  }
    824 
    825  // Set options and apply transformations
    826  if (!cparams_.ModularPartIsLossless()) {
    827    if (cparams_.palette_colors != 0) {
    828      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
    829    }
    830    if (cparams_.color_transform == ColorTransform::kXYB) {
    831      cparams_.channel_colors_pre_transform_percent = 0;
    832    }
    833    cparams_.channel_colors_percent = 0;
    834    cparams_.palette_colors = 0;
    835    cparams_.lossy_palette = false;
    836  }
    837 
    838  // Global palette transforms
    839  float channel_colors_percent = 0;
    840  if (!cparams_.lossy_palette &&
    841      (cparams_.speed_tier <= SpeedTier::kThunder ||
    842       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
    843    channel_colors_percent = cparams_.channel_colors_pre_transform_percent;
    844  }
    845  if (!groupwise) {
    846    try_palettes(gi, max_bitdepth, maxval, cparams_, channel_colors_percent,
    847                 pool);
    848  }
    849 
    850  // don't do an RCT if we're short on bits
    851  if (cparams_.color_transform == ColorTransform::kNone && do_color &&
    852      gi.channel.size() - gi.nb_meta_channels >= 3 &&
    853      max_bitdepth + 1 < level_max_bitdepth) {
    854    if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() ||
    855                                    cparams_.speed_tier > SpeedTier::kHare)) {
    856      Transform ycocg{TransformId::kRCT};
    857      ycocg.rct_type = 6;
    858      ycocg.begin_c = gi.nb_meta_channels;
    859      do_transform(gi, ycocg, weighted::Header(), pool);
    860      max_bitdepth++;
    861    } else if (cparams_.colorspace > 0) {
    862      Transform sg(TransformId::kRCT);
    863      sg.begin_c = gi.nb_meta_channels;
    864      sg.rct_type = cparams_.colorspace;
    865      do_transform(gi, sg, weighted::Header(), pool);
    866      max_bitdepth++;
    867    }
    868  }
    869 
    870  if (cparams_.move_to_front_from_channel > 0) {
    871    for (size_t tgt = 0;
    872         tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) {
    873      size_t pos = cparams_.move_to_front_from_channel;
    874      while (pos > 0) {
    875        Transform move(TransformId::kRCT);
    876        if (pos == 1) {
    877          move.begin_c = tgt;
    878          move.rct_type = 28;  // RGB -> GRB
    879          pos -= 1;
    880        } else {
    881          move.begin_c = tgt + pos - 2;
    882          move.rct_type = 14;  // RGB -> BRG
    883          pos -= 2;
    884        }
    885        do_transform(gi, move, weighted::Header(), pool);
    886      }
    887    }
    888  }
    889 
    890  // don't do squeeze if we don't have some spare bits
    891  if (!groupwise && cparams_.responsive && !gi.channel.empty() &&
    892      max_bitdepth + 2 < level_max_bitdepth) {
    893    Transform t(TransformId::kSqueeze);
    894    do_transform(gi, t, weighted::Header(), pool);
    895    max_bitdepth += 2;
    896  }
    897 
    898  if (max_bitdepth + 1 > level_max_bitdepth) {
    899    // force no group RCTs if we don't have a spare bit
    900    cparams_.colorspace = 0;
    901  }
    902  JXL_ENSURE(max_bitdepth <= level_max_bitdepth);
    903 
    904  if (!cparams_.ModularPartIsLossless()) {
    905    quants_.resize(gi.channel.size(), 1);
    906    float quantizer = 0.25f;
    907    if (!cparams_.responsive) {
    908      JXL_DEBUG_V(1,
    909                  "Warning: lossy compression without Squeeze "
    910                  "transform is just color quantization.");
    911      quantizer *= 0.1f;
    912    }
    913    float bitdepth_correction = 1.f;
    914    if (cparams_.color_transform != ColorTransform::kXYB) {
    915      bitdepth_correction = maxval / 255.f;
    916    }
    917    std::vector<float> quantizers;
    918    for (size_t i = 0; i < 3; i++) {
    919      float dist = cparams_.butteraugli_distance;
    920      quantizers.push_back(quantizer * dist * bitdepth_correction);
    921    }
    922    for (size_t i = 0; i < extra_channels.size(); i++) {
    923      int ec_bitdepth =
    924          metadata.extra_channel_info[i].bit_depth.bits_per_sample;
    925      pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0;
    926      bitdepth_correction = ec_maxval / 255.f;
    927      float dist = 0;
    928      if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i];
    929      if (dist < 0) dist = cparams_.butteraugli_distance;
    930      quantizers.push_back(quantizer * dist * bitdepth_correction);
    931    }
    932    if (cparams_.options.nb_repeats == 0) {
    933      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
    934    }
    935    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
    936      Channel& ch = gi.channel[i];
    937      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
    938      if (shift > 16) shift = 16;
    939      if (shift > 0) shift--;
    940      int q;
    941      // assuming default Squeeze here
    942      int component =
    943          (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans);
    944      // last 4 channels are final chroma residuals
    945      if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) {
    946        component = 1;
    947      }
    948      if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
    949        q = quantizers[component] * squeeze_quality_factor_xyb *
    950            squeeze_xyb_qtable[component][shift];
    951      } else {
    952        if (cparams_.colorspace != 0 && component > 0 && component < 3) {
    953          q = quantizers[component] * squeeze_quality_factor *
    954              squeeze_chroma_qtable[shift];
    955        } else {
    956          q = quantizers[component] * squeeze_quality_factor *
    957              squeeze_luma_factor * squeeze_luma_qtable[shift];
    958        }
    959      }
    960      if (q < 1) q = 1;
    961      QuantizeChannel(gi.channel[i], q);
    962      quants_[i] = q;
    963    }
    964  }
    965 
    966  // Fill other groups.
    967  // DC
    968  for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) {
    969    const size_t rgx = group_id % patch_dim.xsize_dc_groups;
    970    const size_t rgy = group_id / patch_dim.xsize_dc_groups;
    971    const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim,
    972                    patch_dim.dc_group_dim, patch_dim.dc_group_dim);
    973    size_t gx = rgx + frame_area_rect.x0() / 2048;
    974    size_t gy = rgy + frame_area_rect.y0() / 2048;
    975    size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx;
    976    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
    977    // maxShift==1000 is infinity
    978    stream_params_.push_back(
    979        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)});
    980  }
    981  // AC global -> nothing.
    982  // AC
    983  for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) {
    984    const size_t rgx = group_id % patch_dim.xsize_groups;
    985    const size_t rgy = group_id / patch_dim.xsize_groups;
    986    const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim,
    987                     patch_dim.group_dim, patch_dim.group_dim);
    988    size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim);
    989    size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim);
    990    size_t real_group_id = gy * frame_dim_.xsize_groups + gx;
    991    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
    992         i++) {
    993      int maxShift;
    994      int minShift;
    995      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
    996      stream_params_.push_back(
    997          GroupParams{mrect, minShift, maxShift,
    998                      ModularStreamId::ModularAC(real_group_id, i)});
    999    }
   1000  }
   1001  // if there's only one group, everything ends up in GlobalModular
   1002  // in that case, also try RCTs/WP params for the one group
   1003  if (stream_params_.size() == 2) {
   1004    stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
   1005                                         ModularStreamId::Global()});
   1006  }
   1007  gi_channel_.resize(stream_images_.size());
   1008 
   1009  const auto process_row = [&](const uint32_t i,
   1010                               size_t /* thread */) -> Status {
   1011    size_t stream = stream_params_[i].id.ID(frame_dim_);
   1012    if (stream != 0) {
   1013      stream_options_[stream] = stream_options_[0];
   1014    }
   1015    JXL_RETURN_IF_ERROR(PrepareStreamParams(
   1016        stream_params_[i].rect, cparams_, stream_params_[i].minShift,
   1017        stream_params_[i].maxShift, stream_params_[i].id, do_color, groupwise));
   1018    return true;
   1019  };
   1020  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, stream_params_.size(),
   1021                                ThreadPool::NoInit, process_row,
   1022                                "ChooseParams"));
   1023  {
   1024    // Clear out channels that have been copied to groups.
   1025    Image& full_image = stream_images_[0];
   1026    size_t c = full_image.nb_meta_channels;
   1027    for (; c < full_image.channel.size(); c++) {
   1028      Channel& fc = full_image.channel[c];
   1029      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
   1030    }
   1031    for (; c < full_image.channel.size(); c++) {
   1032      full_image.channel[c].plane = ImageI();
   1033    }
   1034  }
   1035 
   1036  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
   1037  return true;
   1038 }
   1039 
   1040 Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) {
   1041  std::vector<ModularMultiplierInfo> multiplier_info;
   1042  if (!quants_.empty()) {
   1043    for (uint32_t stream_id = 0; stream_id < stream_images_.size();
   1044         stream_id++) {
   1045      // skip non-modular stream_ids
   1046      if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
   1047      const Image& image = stream_images_[stream_id];
   1048      const ModularOptions& options = stream_options_[stream_id];
   1049      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
   1050        if (i >= image.nb_meta_channels &&
   1051            (image.channel[i].w > options.max_chan_size ||
   1052             image.channel[i].h > options.max_chan_size)) {
   1053          continue;
   1054        }
   1055        if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
   1056        size_t ch_id = stream_id == 0
   1057                           ? i
   1058                           : gi_channel_[stream_id][i - image.nb_meta_channels];
   1059        uint32_t q = quants_[ch_id];
   1060        // Inform the tree splitting heuristics that each channel in each group
   1061        // used this quantization factor. This will produce a tree with the
   1062        // given multipliers.
   1063        if (multiplier_info.empty() ||
   1064            multiplier_info.back().range[1][0] != stream_id ||
   1065            multiplier_info.back().multiplier != q) {
   1066          StaticPropRange range;
   1067          range[0] = {{i, i + 1}};
   1068          range[1] = {{stream_id, stream_id + 1}};
   1069          multiplier_info.push_back({range, static_cast<uint32_t>(q)});
   1070        } else {
   1071          // Previous channel in the same group had the same quantization
   1072          // factor. Don't provide two different ranges, as that creates
   1073          // unnecessary nodes.
   1074          multiplier_info.back().range[0][1] = i + 1;
   1075        }
   1076      }
   1077    }
   1078    // Merge group+channel settings that have the same channels and quantization
   1079    // factors, to avoid unnecessary nodes.
   1080    std::sort(multiplier_info.begin(), multiplier_info.end(),
   1081              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
   1082                return std::make_tuple(a.range, a.multiplier) <
   1083                       std::make_tuple(b.range, b.multiplier);
   1084              });
   1085    size_t new_num = 1;
   1086    for (size_t i = 1; i < multiplier_info.size(); i++) {
   1087      ModularMultiplierInfo& prev = multiplier_info[new_num - 1];
   1088      ModularMultiplierInfo& cur = multiplier_info[i];
   1089      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
   1090          prev.range[1][1] == cur.range[1][0]) {
   1091        prev.range[1][1] = cur.range[1][1];
   1092      } else {
   1093        multiplier_info[new_num++] = multiplier_info[i];
   1094      }
   1095    }
   1096    multiplier_info.resize(new_num);
   1097  }
   1098 
   1099  if (!cparams_.custom_fixed_tree.empty()) {
   1100    tree_ = cparams_.custom_fixed_tree;
   1101  } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
   1102             !cparams_.modular_mode) {
   1103    // Avoid creating a tree with leaves that don't correspond to any pixels.
   1104    std::vector<size_t> useful_splits;
   1105    useful_splits.reserve(tree_splits_.size());
   1106    for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
   1107      bool has_pixels = false;
   1108      size_t start = tree_splits_[chunk];
   1109      size_t stop = tree_splits_[chunk + 1];
   1110      for (size_t i = start; i < stop; i++) {
   1111        if (!stream_images_[i].empty()) has_pixels = true;
   1112      }
   1113      if (has_pixels) {
   1114        useful_splits.push_back(tree_splits_[chunk]);
   1115      }
   1116    }
   1117    // Don't do anything if modular mode does not have any pixels in this image
   1118    if (useful_splits.empty()) return true;
   1119    useful_splits.push_back(tree_splits_.back());
   1120 
   1121    std::vector<Tree> trees(useful_splits.size() - 1);
   1122    const auto process_chunk = [&](const uint32_t chunk,
   1123                                   size_t /* thread */) -> Status {
   1124      // TODO(veluca): parallelize more.
   1125      size_t total_pixels = 0;
   1126      uint32_t start = useful_splits[chunk];
   1127      uint32_t stop = useful_splits[chunk + 1];
   1128      while (start < stop && stream_images_[start].empty()) ++start;
   1129      while (start < stop && stream_images_[stop - 1].empty()) --stop;
   1130      if (stream_options_[start].tree_kind !=
   1131          ModularOptions::TreeKind::kLearn) {
   1132        for (size_t i = start; i < stop; i++) {
   1133          for (const Channel& ch : stream_images_[i].channel) {
   1134            total_pixels += ch.w * ch.h;
   1135          }
   1136        }
   1137        trees[chunk] = PredefinedTree(stream_options_[start].tree_kind,
   1138                                      total_pixels, 8, 0);
   1139        return true;
   1140      }
   1141      TreeSamples tree_samples;
   1142      JXL_RETURN_IF_ERROR(
   1143          tree_samples.SetPredictor(stream_options_[start].predictor,
   1144                                    stream_options_[start].wp_tree_mode));
   1145      JXL_RETURN_IF_ERROR(tree_samples.SetProperties(
   1146          stream_options_[start].splitting_heuristics_properties,
   1147          stream_options_[start].wp_tree_mode));
   1148      uint32_t max_c = 0;
   1149      std::vector<pixel_type> pixel_samples;
   1150      std::vector<pixel_type> diff_samples;
   1151      std::vector<uint32_t> group_pixel_count;
   1152      std::vector<uint32_t> channel_pixel_count;
   1153      for (uint32_t i = start; i < stop; i++) {
   1154        max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c);
   1155        CollectPixelSamples(stream_images_[i], stream_options_[i], i,
   1156                            group_pixel_count, channel_pixel_count,
   1157                            pixel_samples, diff_samples);
   1158      }
   1159      StaticPropRange range;
   1160      range[0] = {{0, max_c}};
   1161      range[1] = {{start, stop}};
   1162 
   1163      tree_samples.PreQuantizeProperties(
   1164          range, multiplier_info, group_pixel_count, channel_pixel_count,
   1165          pixel_samples, diff_samples,
   1166          stream_options_[start].max_property_values);
   1167      for (size_t i = start; i < stop; i++) {
   1168        JXL_RETURN_IF_ERROR(
   1169            ModularGenericCompress(stream_images_[i], stream_options_[i],
   1170                                   /*writer=*/nullptr,
   1171                                   /*aux_out=*/nullptr, LayerType::Header, i,
   1172                                   &tree_samples, &total_pixels));
   1173      }
   1174 
   1175      // TODO(veluca): parallelize more.
   1176      JXL_ASSIGN_OR_RETURN(
   1177          trees[chunk],
   1178          LearnTree(std::move(tree_samples), total_pixels,
   1179                    stream_options_[start], multiplier_info, range));
   1180      return true;
   1181    };
   1182    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, useful_splits.size() - 1,
   1183                                  ThreadPool::NoInit, process_chunk,
   1184                                  "LearnTrees"));
   1185    tree_.clear();
   1186    JXL_RETURN_IF_ERROR(
   1187        MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_));
   1188  } else {
   1189    // Fixed tree.
   1190    size_t total_pixels = 0;
   1191    int max_bitdepth = 0;
   1192    for (const Image& img : stream_images_) {
   1193      max_bitdepth = std::max(max_bitdepth, img.bitdepth);
   1194      for (const Channel& ch : img.channel) {
   1195        total_pixels += ch.w * ch.h;
   1196      }
   1197    }
   1198    if (cparams_.speed_tier <= SpeedTier::kFalcon) {
   1199      tree_ = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels,
   1200                             max_bitdepth, stream_options_[0].max_properties);
   1201    } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
   1202      tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
   1203                             total_pixels, max_bitdepth,
   1204                             stream_options_[0].max_properties);
   1205    } else {
   1206      tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
   1207    }
   1208  }
   1209  tree_tokens_.resize(1);
   1210  tree_tokens_[0].clear();
   1211  Tree decoded_tree;
   1212  JXL_RETURN_IF_ERROR(TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree));
   1213  JXL_ENSURE(tree_.size() == decoded_tree.size());
   1214  tree_ = std::move(decoded_tree);
   1215 
   1216  /* TODO(szabadka) Add text output callback to cparams
   1217  if (kPrintTree && WantDebugOutput(aux_out)) {
   1218    if (frame_header.dc_level > 0) {
   1219      PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
   1220                           std::to_string(frame_header.dc_level) + "_tree");
   1221    } else {
   1222      PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
   1223    }
   1224  } */
   1225  return true;
   1226 }
   1227 
   1228 Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) {
   1229  size_t num_streams = stream_images_.size();
   1230  stream_headers_.resize(num_streams);
   1231  tokens_.resize(num_streams);
   1232  image_widths_.resize(num_streams);
   1233  const auto process_stream = [&](const uint32_t stream_id,
   1234                                  size_t /* thread */) -> Status {
   1235    AuxOut my_aux_out;
   1236    tokens_[stream_id].clear();
   1237    JXL_RETURN_IF_ERROR(ModularGenericCompress(
   1238        stream_images_[stream_id], stream_options_[stream_id],
   1239        /*writer=*/nullptr, &my_aux_out, LayerType::Header, stream_id,
   1240        /*tree_samples=*/nullptr,
   1241        /*total_pixels=*/nullptr,
   1242        /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id],
   1243        /*tokens=*/&tokens_[stream_id],
   1244        /*widths=*/&image_widths_[stream_id]));
   1245    return true;
   1246  };
   1247  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_streams, ThreadPool::NoInit,
   1248                                process_stream, "ComputeTokens"));
   1249  return true;
   1250 }
   1251 
   1252 Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode,
   1253                                             BitWriter* writer,
   1254                                             AuxOut* aux_out) {
   1255  JxlMemoryManager* memory_manager = writer->memory_manager();
   1256  bool skip_rest = false;
   1257  JXL_RETURN_IF_ERROR(
   1258      writer->WithMaxBits(1, LayerType::ModularTree, aux_out, [&] {
   1259        // If we are using brotli, or not using modular mode.
   1260        if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
   1261          writer->Write(1, 0);
   1262          skip_rest = true;
   1263        } else {
   1264          writer->Write(1, 1);
   1265        }
   1266        return true;
   1267      }));
   1268  if (skip_rest) return true;
   1269 
   1270  // Write tree
   1271  HistogramParams params =
   1272      HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode);
   1273  {
   1274    EntropyEncodingData tree_code;
   1275    std::vector<uint8_t> tree_context_map;
   1276    JXL_ASSIGN_OR_RETURN(
   1277        size_t cost,
   1278        BuildAndEncodeHistograms(memory_manager, params, kNumTreeContexts,
   1279                                 tree_tokens_, &tree_code, &tree_context_map,
   1280                                 writer, LayerType::ModularTree, aux_out));
   1281    (void)cost;
   1282    JXL_RETURN_IF_ERROR(WriteTokens(tree_tokens_[0], tree_code,
   1283                                    tree_context_map, 0, writer,
   1284                                    LayerType::ModularTree, aux_out));
   1285  }
   1286  params.streaming_mode = streaming_mode;
   1287  params.add_missing_symbols = streaming_mode;
   1288  params.image_widths = image_widths_;
   1289  // Write histograms.
   1290  JXL_ASSIGN_OR_RETURN(
   1291      size_t cost,
   1292      BuildAndEncodeHistograms(memory_manager, params, (tree_.size() + 1) / 2,
   1293                               tokens_, &code_, &context_map_, writer,
   1294                               LayerType::ModularGlobal, aux_out));
   1295  (void)cost;
   1296  return true;
   1297 }
   1298 
   1299 Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
   1300                                         LayerType layer,
   1301                                         const ModularStreamId& stream) {
   1302  size_t stream_id = stream.ID(frame_dim_);
   1303  if (stream_images_[stream_id].channel.empty()) {
   1304    JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id);
   1305    return true;  // Image with no channels, header never gets decoded.
   1306  }
   1307  if (tokens_.empty()) {
   1308    JXL_RETURN_IF_ERROR(ModularGenericCompress(
   1309        stream_images_[stream_id], stream_options_[stream_id], writer, aux_out,
   1310        layer, stream_id));
   1311  } else {
   1312    JXL_RETURN_IF_ERROR(
   1313        Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
   1314    JXL_RETURN_IF_ERROR(WriteTokens(tokens_[stream_id], code_, context_map_, 0,
   1315                                    writer, layer, aux_out));
   1316  }
   1317  return true;
   1318 }
   1319 
   1320 void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) {
   1321  size_t stream_id = stream.ID(frame_dim_);
   1322  Image empty_image(stream_images_[stream_id].memory_manager());
   1323  std::swap(stream_images_[stream_id], empty_image);
   1324 }
   1325 
   1326 void ModularFrameEncoder::ClearModularStreamData() {
   1327  for (const auto& group : stream_params_) {
   1328    ClearStreamData(group.id);
   1329  }
   1330  stream_params_.clear();
   1331 }
   1332 
   1333 size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId(
   1334    size_t dc_group_id, size_t ac_group_id,
   1335    const FrameDimensions& patch_dim) const {
   1336  size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups;
   1337  size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups;
   1338  size_t ac_group_x = ac_group_id % patch_dim.xsize_groups;
   1339  size_t ac_group_y = ac_group_id / patch_dim.xsize_groups;
   1340  return (dc_group_x * 8 + ac_group_x) +
   1341         (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups;
   1342 }
   1343 
   1344 Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
   1345                                                const CompressParams& cparams_,
   1346                                                int minShift, int maxShift,
   1347                                                const ModularStreamId& stream,
   1348                                                bool do_color, bool groupwise) {
   1349  size_t stream_id = stream.ID(frame_dim_);
   1350  if (stream_id == 0 && frame_dim_.num_groups != 1) {
   1351    // If we have multiple groups, then the stream with ID 0 holds the full
   1352    // image and we do not want to apply transforms or in general change the
   1353    // pixel values.
   1354    return true;
   1355  }
   1356  Image& full_image = stream_images_[0];
   1357  JxlMemoryManager* memory_manager = full_image.memory_manager();
   1358  const size_t xsize = rect.xsize();
   1359  const size_t ysize = rect.ysize();
   1360  Image& gi = stream_images_[stream_id];
   1361  if (stream_id > 0) {
   1362    JXL_ASSIGN_OR_RETURN(gi, Image::Create(memory_manager, xsize, ysize,
   1363                                           full_image.bitdepth, 0));
   1364    // start at the first bigger-than-frame_dim.group_dim non-metachannel
   1365    size_t c = full_image.nb_meta_channels;
   1366    if (!groupwise) {
   1367      for (; c < full_image.channel.size(); c++) {
   1368        Channel& fc = full_image.channel[c];
   1369        if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
   1370      }
   1371    }
   1372    for (; c < full_image.channel.size(); c++) {
   1373      Channel& fc = full_image.channel[c];
   1374      int shift = std::min(fc.hshift, fc.vshift);
   1375      if (shift > maxShift) continue;
   1376      if (shift < minShift) continue;
   1377      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
   1378             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
   1379      if (r.xsize() == 0 || r.ysize() == 0) continue;
   1380      gi_channel_[stream_id].push_back(c);
   1381      JXL_ASSIGN_OR_RETURN(
   1382          Channel gc, Channel::Create(memory_manager, r.xsize(), r.ysize()));
   1383      gc.hshift = fc.hshift;
   1384      gc.vshift = fc.vshift;
   1385      for (size_t y = 0; y < r.ysize(); ++y) {
   1386        memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
   1387               r.xsize() * sizeof(pixel_type));
   1388      }
   1389      gi.channel.emplace_back(std::move(gc));
   1390    }
   1391 
   1392    if (gi.channel.empty()) return true;
   1393    // Do some per-group transforms
   1394 
   1395    // Local palette transforms
   1396    // TODO(veluca): make this work with quantize-after-prediction in lossy
   1397    // mode.
   1398    if (cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette &&
   1399        cparams_.speed_tier < SpeedTier::kCheetah) {
   1400      int max_bitdepth = 0, maxval = 0;  // don't care about that here
   1401      float channel_color_percent = 0;
   1402      if (!(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) {
   1403        channel_color_percent = cparams_.channel_colors_percent;
   1404      }
   1405      try_palettes(gi, max_bitdepth, maxval, cparams_, channel_color_percent);
   1406    }
   1407  }
   1408 
   1409  // lossless and no specific color transform specified: try Nothing, YCoCg,
   1410  // and 17 RCTs
   1411  if (cparams_.color_transform == ColorTransform::kNone &&
   1412      cparams_.IsLossless() && cparams_.colorspace < 0 &&
   1413      gi.channel.size() - gi.nb_meta_channels >= 3 &&
   1414      cparams_.responsive == JXL_FALSE && do_color &&
   1415      cparams_.speed_tier <= SpeedTier::kHare) {
   1416    Transform sg(TransformId::kRCT);
   1417    sg.begin_c = gi.nb_meta_channels;
   1418    size_t nb_rcts_to_try = 0;
   1419    switch (cparams_.speed_tier) {
   1420      case SpeedTier::kLightning:
   1421      case SpeedTier::kThunder:
   1422      case SpeedTier::kFalcon:
   1423      case SpeedTier::kCheetah:
   1424        nb_rcts_to_try = 0;  // Just do global YCoCg
   1425        break;
   1426      case SpeedTier::kHare:
   1427        nb_rcts_to_try = 4;
   1428        break;
   1429      case SpeedTier::kWombat:
   1430        nb_rcts_to_try = 5;
   1431        break;
   1432      case SpeedTier::kSquirrel:
   1433        nb_rcts_to_try = 7;
   1434        break;
   1435      case SpeedTier::kKitten:
   1436        nb_rcts_to_try = 9;
   1437        break;
   1438      case SpeedTier::kTectonicPlate:
   1439      case SpeedTier::kGlacier:
   1440      case SpeedTier::kTortoise:
   1441        nb_rcts_to_try = 19;
   1442        break;
   1443    }
   1444    float best_cost = std::numeric_limits<float>::max();
   1445    size_t best_rct = 0;
   1446    // These should be 19 actually different transforms; the remaining ones
   1447    // are equivalent to one of these (note that the first two are do-nothing
   1448    // and YCoCg) modulo channel reordering (which only matters in the case of
   1449    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
   1450    for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
   1451                  5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
   1452                  1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
   1453                  4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
   1454      if (nb_rcts_to_try == 0) break;
   1455      sg.rct_type = i;
   1456      nb_rcts_to_try--;
   1457      if (do_transform(gi, sg, weighted::Header())) {
   1458        float cost = EstimateCost(gi);
   1459        if (cost < best_cost) {
   1460          best_rct = i;
   1461          best_cost = cost;
   1462        }
   1463        Transform t = gi.transform.back();
   1464        JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr));
   1465        gi.transform.pop_back();
   1466      }
   1467    }
   1468    // Apply the best RCT to the image for future encoding.
   1469    sg.rct_type = best_rct;
   1470    do_transform(gi, sg, weighted::Header());
   1471  } else {
   1472    // No need to try anything, just use the default options.
   1473  }
   1474  size_t nb_wp_modes = 1;
   1475  if (cparams_.speed_tier <= SpeedTier::kTortoise) {
   1476    nb_wp_modes = 5;
   1477  } else if (cparams_.speed_tier <= SpeedTier::kKitten) {
   1478    nb_wp_modes = 2;
   1479  }
   1480  if (nb_wp_modes > 1 &&
   1481      (stream_options_[stream_id].predictor == Predictor::Weighted ||
   1482       stream_options_[stream_id].predictor == Predictor::Best ||
   1483       stream_options_[stream_id].predictor == Predictor::Variable)) {
   1484    float best_cost = std::numeric_limits<float>::max();
   1485    stream_options_[stream_id].wp_mode = 0;
   1486    for (size_t i = 0; i < nb_wp_modes; i++) {
   1487      float cost = EstimateWPCost(gi, i);
   1488      if (cost < best_cost) {
   1489        best_cost = cost;
   1490        stream_options_[stream_id].wp_mode = i;
   1491      }
   1492    }
   1493  }
   1494  return true;
   1495 }
   1496 
   1497 constexpr float q_deadzone = 0.62f;
   1498 int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
   1499               size_t w, weighted::State* wp_state, float value,
   1500               float inv_factor) {
   1501  float svalue = value * inv_factor;
   1502  PredictionResult pred =
   1503      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
   1504  svalue -= pred.guess;
   1505  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
   1506  int residual = roundf(svalue);
   1507  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
   1508  return residual + pred.guess;
   1509 }
   1510 
   1511 int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
   1512                     size_t y, size_t w, float value, float inv_factor) {
   1513  float svalue = value * inv_factor;
   1514  PredictionResult pred =
   1515      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
   1516  svalue -= pred.guess;
   1517  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
   1518  int residual = roundf(svalue);
   1519  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
   1520  return residual + pred.guess;
   1521 }
   1522 
   1523 Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header,
   1524                                        const Image3F& dc, const Rect& r,
   1525                                        size_t group_index, bool nl_dc,
   1526                                        PassesEncoderState* enc_state,
   1527                                        bool jpeg_transcode) {
   1528  JxlMemoryManager* memory_manager = dc.memory_manager();
   1529  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
   1530  float mul = 1 << extra_dc_precision[group_index];
   1531 
   1532  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
   1533  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
   1534  stream_options_[stream_id].predictor = Predictor::Weighted;
   1535  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
   1536  if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
   1537    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
   1538  }
   1539  if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
   1540    stream_options_[stream_id].predictor =
   1541        (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
   1542                                                  : Predictor::Best);
   1543    stream_options_[stream_id].wp_tree_mode =
   1544        ModularOptions::TreeMode::kDefault;
   1545    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
   1546  }
   1547  if (cparams_.decoding_speed_tier >= 1) {
   1548    stream_options_[stream_id].tree_kind =
   1549        ModularOptions::TreeKind::kGradientFixedDC;
   1550  }
   1551  stream_options_[stream_id].histogram_params =
   1552      stream_options_[0].histogram_params;
   1553 
   1554  JXL_ASSIGN_OR_RETURN(
   1555      stream_images_[stream_id],
   1556      Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 3));
   1557  const ColorCorrelation& color_correlation = enc_state->shared.cmap.base();
   1558  if (nl_dc && stream_options_[stream_id].tree_kind ==
   1559                   ModularOptions::TreeKind::kGradientFixedDC) {
   1560    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
   1561    for (size_t c : {1, 0, 2}) {
   1562      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
   1563      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
   1564      float cfl_factor = color_correlation.DCFactors()[c];
   1565      for (size_t y = 0; y < r.ysize(); y++) {
   1566        int32_t* quant_row =
   1567            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
   1568        size_t stride = stream_images_[stream_id]
   1569                            .channel[c < 2 ? c ^ 1 : c]
   1570                            .plane.PixelsPerRow();
   1571        const float* row = r.ConstPlaneRow(dc, c, y);
   1572        if (c == 1) {
   1573          for (size_t x = 0; x < r.xsize(); x++) {
   1574            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
   1575                                            r.xsize(), row[x], inv_factor);
   1576          }
   1577        } else {
   1578          int32_t* quant_row_y =
   1579              stream_images_[stream_id].channel[0].plane.Row(y);
   1580          for (size_t x = 0; x < r.xsize(); x++) {
   1581            quant_row[x] = QuantizeGradient(
   1582                quant_row, stride, c, x, y, r.xsize(),
   1583                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
   1584          }
   1585        }
   1586      }
   1587    }
   1588  } else if (nl_dc) {
   1589    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
   1590    for (size_t c : {1, 0, 2}) {
   1591      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
   1592      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
   1593      float cfl_factor = color_correlation.DCFactors()[c];
   1594      weighted::Header header;
   1595      weighted::State wp_state(header, r.xsize(), r.ysize());
   1596      for (size_t y = 0; y < r.ysize(); y++) {
   1597        int32_t* quant_row =
   1598            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
   1599        size_t stride = stream_images_[stream_id]
   1600                            .channel[c < 2 ? c ^ 1 : c]
   1601                            .plane.PixelsPerRow();
   1602        const float* row = r.ConstPlaneRow(dc, c, y);
   1603        if (c == 1) {
   1604          for (size_t x = 0; x < r.xsize(); x++) {
   1605            quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
   1606                                      &wp_state, row[x], inv_factor);
   1607            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
   1608          }
   1609        } else {
   1610          int32_t* quant_row_y =
   1611              stream_images_[stream_id].channel[0].plane.Row(y);
   1612          for (size_t x = 0; x < r.xsize(); x++) {
   1613            quant_row[x] = QuantizeWP(
   1614                quant_row, stride, c, x, y, r.xsize(), &wp_state,
   1615                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
   1616            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
   1617          }
   1618        }
   1619      }
   1620    }
   1621  } else if (frame_header.chroma_subsampling.Is444()) {
   1622    for (size_t c : {1, 0, 2}) {
   1623      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
   1624      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
   1625      float cfl_factor = color_correlation.DCFactors()[c];
   1626      for (size_t y = 0; y < r.ysize(); y++) {
   1627        int32_t* quant_row =
   1628            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
   1629        const float* row = r.ConstPlaneRow(dc, c, y);
   1630        if (c == 1) {
   1631          for (size_t x = 0; x < r.xsize(); x++) {
   1632            quant_row[x] = roundf(row[x] * inv_factor);
   1633          }
   1634        } else {
   1635          int32_t* quant_row_y =
   1636              stream_images_[stream_id].channel[0].plane.Row(y);
   1637          for (size_t x = 0; x < r.xsize(); x++) {
   1638            quant_row[x] =
   1639                roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
   1640                       inv_factor);
   1641          }
   1642        }
   1643      }
   1644    }
   1645  } else {
   1646    for (size_t c : {1, 0, 2}) {
   1647      Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c),
   1648                r.y0() >> frame_header.chroma_subsampling.VShift(c),
   1649                r.xsize() >> frame_header.chroma_subsampling.HShift(c),
   1650                r.ysize() >> frame_header.chroma_subsampling.VShift(c));
   1651      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
   1652      size_t ys = rect.ysize();
   1653      size_t xs = rect.xsize();
   1654      Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
   1655      ch.w = xs;
   1656      ch.h = ys;
   1657      JXL_RETURN_IF_ERROR(ch.shrink());
   1658      for (size_t y = 0; y < ys; y++) {
   1659        int32_t* quant_row = ch.plane.Row(y);
   1660        const float* row = rect.ConstPlaneRow(dc, c, y);
   1661        for (size_t x = 0; x < xs; x++) {
   1662          quant_row[x] = roundf(row[x] * inv_factor);
   1663        }
   1664      }
   1665    }
   1666  }
   1667 
   1668  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
   1669            stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
   1670            1.0 / mul, color_correlation.DCFactors(),
   1671            frame_header.chroma_subsampling, enc_state->shared.block_ctx_map);
   1672  return true;
   1673 }
   1674 
   1675 Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index,
   1676                                          bool jpeg_transcode,
   1677                                          PassesEncoderState* enc_state) {
   1678  JxlMemoryManager* memory_manager = enc_state->memory_manager();
   1679  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
   1680  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
   1681  if (stream_options_[stream_id].predictor != Predictor::Weighted) {
   1682    stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
   1683  }
   1684  if (jpeg_transcode) {
   1685    stream_options_[stream_id].tree_kind =
   1686        ModularOptions::TreeKind::kJpegTranscodeACMeta;
   1687  } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
   1688    stream_options_[stream_id].tree_kind =
   1689        ModularOptions::TreeKind::kFalconACMeta;
   1690  } else if (cparams_.speed_tier > SpeedTier::kKitten) {
   1691    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
   1692  }
   1693  // If we are using a non-constant CfL field, and are in a slow enough mode,
   1694  // re-enable tree computation for it.
   1695  if (cparams_.speed_tier < SpeedTier::kSquirrel &&
   1696      cparams_.force_cfl_jpeg_recompression) {
   1697    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
   1698  }
   1699  stream_options_[stream_id].histogram_params =
   1700      stream_options_[0].histogram_params;
   1701  // YToX, YToB, ACS + QF, EPF
   1702  Image& image = stream_images_[stream_id];
   1703  JXL_ASSIGN_OR_RETURN(
   1704      image, Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 4));
   1705  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
   1706  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
   1707  JXL_ASSIGN_OR_RETURN(
   1708      image.channel[0],
   1709      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
   1710  JXL_ASSIGN_OR_RETURN(
   1711      image.channel[1],
   1712      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
   1713  JXL_ASSIGN_OR_RETURN(
   1714      image.channel[2],
   1715      Channel::Create(memory_manager, r.xsize() * r.ysize(), 2, 0, 0));
   1716  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
   1717                                           Rect(image.channel[0].plane),
   1718                                           &image.channel[0].plane));
   1719  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
   1720                                           Rect(image.channel[1].plane),
   1721                                           &image.channel[1].plane));
   1722  size_t num = 0;
   1723  for (size_t y = 0; y < r.ysize(); y++) {
   1724    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
   1725    const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
   1726    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
   1727    int32_t* out_acs = image.channel[2].plane.Row(0);
   1728    int32_t* out_qf = image.channel[2].plane.Row(1);
   1729    int32_t* row_out_epf = image.channel[3].plane.Row(y);
   1730    for (size_t x = 0; x < r.xsize(); x++) {
   1731      row_out_epf[x] = row_epf[x];
   1732      if (!row_acs[x].IsFirstBlock()) continue;
   1733      out_acs[num] = row_acs[x].RawStrategy();
   1734      out_qf[num] = row_qf[x] - 1;
   1735      num++;
   1736    }
   1737  }
   1738  image.channel[2].w = num;
   1739  ac_metadata_size[group_index] = num;
   1740  return true;
   1741 }
   1742 
   1743 Status ModularFrameEncoder::EncodeQuantTable(
   1744    JxlMemoryManager* memory_manager, size_t size_x, size_t size_y,
   1745    BitWriter* writer, const QuantEncoding& encoding, size_t idx,
   1746    ModularFrameEncoder* modular_frame_encoder) {
   1747  JXL_ENSURE(encoding.qraw.qtable);
   1748  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
   1749  JXL_ENSURE(idx < kNumQuantTables);
   1750  int* qtable = encoding.qraw.qtable->data();
   1751  JXL_RETURN_IF_ERROR(F16Coder::Write(encoding.qraw.qtable_den, writer));
   1752  if (modular_frame_encoder) {
   1753    JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
   1754    JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
   1755        writer, nullptr, LayerType::Header, qt));
   1756    return true;
   1757  }
   1758  JXL_ASSIGN_OR_RETURN(Image image,
   1759                       Image::Create(memory_manager, size_x, size_y, 8, 3));
   1760  for (size_t c = 0; c < 3; c++) {
   1761    for (size_t y = 0; y < size_y; y++) {
   1762      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
   1763      for (size_t x = 0; x < size_x; x++) {
   1764        row[x] = qtable[c * size_x * size_y + y * size_x + x];
   1765      }
   1766    }
   1767  }
   1768  ModularOptions cfopts;
   1769  JXL_RETURN_IF_ERROR(ModularGenericCompress(image, cfopts, writer));
   1770  return true;
   1771 }
   1772 
   1773 Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
   1774                                          const QuantEncoding& encoding,
   1775                                          size_t idx) {
   1776  JXL_ENSURE(idx < kNumQuantTables);
   1777  JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
   1778  size_t stream_id = qt.ID(frame_dim_);
   1779  JXL_ENSURE(encoding.qraw.qtable);
   1780  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
   1781  int* qtable = encoding.qraw.qtable->data();
   1782  Image& image = stream_images_[stream_id];
   1783  JxlMemoryManager* memory_manager = image.memory_manager();
   1784  JXL_ASSIGN_OR_RETURN(image,
   1785                       Image::Create(memory_manager, size_x, size_y, 8, 3));
   1786  for (size_t c = 0; c < 3; c++) {
   1787    for (size_t y = 0; y < size_y; y++) {
   1788      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
   1789      for (size_t x = 0; x < size_x; x++) {
   1790        row[x] = qtable[c * size_x * size_y + y * size_x + x];
   1791      }
   1792    }
   1793  }
   1794  return true;
   1795 }
   1796 }  // namespace jxl