[ tor-browser ].git.dasho

enc_xyb.cc (15864B)
      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/enc_xyb.h"
      7 
      8 #include <jxl/memory_manager.h>
      9 
     10 #include <algorithm>
     11 #include <cstdlib>
     12 
     13 #undef HWY_TARGET_INCLUDE
     14 #define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
     15 #include <hwy/foreach_target.h>
     16 #include <hwy/highway.h>
     17 
     18 #include "lib/jxl/base/compiler_specific.h"
     19 #include "lib/jxl/base/data_parallel.h"
     20 #include "lib/jxl/base/fast_math-inl.h"
     21 #include "lib/jxl/base/rect.h"
     22 #include "lib/jxl/base/status.h"
     23 #include "lib/jxl/cms/opsin_params.h"
     24 #include "lib/jxl/cms/transfer_functions-inl.h"
     25 #include "lib/jxl/color_encoding_internal.h"
     26 #include "lib/jxl/enc_image_bundle.h"
     27 #include "lib/jxl/image_bundle.h"
     28 #include "lib/jxl/image_ops.h"
     29 
     30 HWY_BEFORE_NAMESPACE();
     31 namespace jxl {
     32 namespace HWY_NAMESPACE {
     33 
     34 // These templates are not found via ADL.
     35 using hwy::HWY_NAMESPACE::Add;
     36 using hwy::HWY_NAMESPACE::Mul;
     37 using hwy::HWY_NAMESPACE::MulAdd;
     38 using hwy::HWY_NAMESPACE::Sub;
     39 using hwy::HWY_NAMESPACE::ZeroIfNegative;
     40 
     41 // 4x3 matrix * 3x1 SIMD vectors
     42 template <class V>
     43 JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
     44                                const float* JXL_RESTRICT premul_absorb,
     45                                V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
     46                                V* JXL_RESTRICT mixed2) {
     47  const float* bias = jxl::cms::kOpsinAbsorbanceBias.data();
     48  const HWY_FULL(float) d;
     49  const size_t N = Lanes(d);
     50  const auto m0 = Load(d, premul_absorb + 0 * N);
     51  const auto m1 = Load(d, premul_absorb + 1 * N);
     52  const auto m2 = Load(d, premul_absorb + 2 * N);
     53  const auto m3 = Load(d, premul_absorb + 3 * N);
     54  const auto m4 = Load(d, premul_absorb + 4 * N);
     55  const auto m5 = Load(d, premul_absorb + 5 * N);
     56  const auto m6 = Load(d, premul_absorb + 6 * N);
     57  const auto m7 = Load(d, premul_absorb + 7 * N);
     58  const auto m8 = Load(d, premul_absorb + 8 * N);
     59  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
     60  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
     61  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
     62 }
     63 
     64 template <class V>
     65 void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
     66              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
     67  const HWY_FULL(float) d;
     68  const V half = Set(d, 0.5f);
     69  Store(Mul(half, Sub(r, g)), d, valx);
     70  Store(Mul(half, Add(r, g)), d, valy);
     71  Store(b, d, valz);
     72 }
     73 
     74 // Converts one RGB vector to XYB.
     75 template <class V>
     76 void LinearRGBToXYB(const V r, const V g, const V b,
     77                    const float* JXL_RESTRICT premul_absorb,
     78                    float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
     79                    float* JXL_RESTRICT valz) {
     80  V mixed0;
     81  V mixed1;
     82  V mixed2;
     83  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
     84 
     85  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
     86  mixed0 = ZeroIfNegative(mixed0);
     87  mixed1 = ZeroIfNegative(mixed1);
     88  mixed2 = ZeroIfNegative(mixed2);
     89 
     90  const HWY_FULL(float) d;
     91  const size_t N = Lanes(d);
     92  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
     93  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
     94  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
     95  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
     96 
     97  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
     98 }
     99 
    100 void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
    101                       float* JXL_RESTRICT row2,
    102                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
    103  const HWY_FULL(float) d;
    104  for (size_t x = 0; x < xsize; x += Lanes(d)) {
    105    const auto r = Load(d, row0 + x);
    106    const auto g = Load(d, row1 + x);
    107    const auto b = Load(d, row2 + x);
    108    LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x);
    109  }
    110 }
    111 
    112 // Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
    113 template <class V>
    114 V LinearFromSRGB(V encoded) {
    115  return TF_SRGB().DisplayFromEncoded(encoded);
    116 }
    117 
    118 Status LinearSRGBToXYB(const float* JXL_RESTRICT premul_absorb,
    119                       ThreadPool* pool, Image3F* JXL_RESTRICT image) {
    120  const size_t xsize = image->xsize();
    121 
    122  const HWY_FULL(float) d;
    123  const auto process_row = [&](const uint32_t task,
    124                               size_t /*thread*/) -> Status {
    125    const size_t y = static_cast<size_t>(task);
    126    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
    127    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
    128    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
    129 
    130    for (size_t x = 0; x < xsize; x += Lanes(d)) {
    131      const auto in_r = Load(d, row0 + x);
    132      const auto in_g = Load(d, row1 + x);
    133      const auto in_b = Load(d, row2 + x);
    134      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
    135                     row2 + x);
    136    }
    137    return true;
    138  };
    139  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
    140                                ThreadPool::NoInit, process_row,
    141                                "LinearToXYB"));
    142  return true;
    143 }
    144 
    145 Status SRGBToXYB(const float* JXL_RESTRICT premul_absorb, ThreadPool* pool,
    146                 Image3F* JXL_RESTRICT image) {
    147  const size_t xsize = image->xsize();
    148 
    149  const HWY_FULL(float) d;
    150  const auto process_row = [&](const uint32_t task,
    151                               size_t /*thread*/) -> Status {
    152    const size_t y = static_cast<size_t>(task);
    153    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
    154    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
    155    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
    156 
    157    for (size_t x = 0; x < xsize; x += Lanes(d)) {
    158      const auto in_r = LinearFromSRGB(Load(d, row0 + x));
    159      const auto in_g = LinearFromSRGB(Load(d, row1 + x));
    160      const auto in_b = LinearFromSRGB(Load(d, row2 + x));
    161      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
    162                     row2 + x);
    163    }
    164    return true;
    165  };
    166  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
    167                                ThreadPool::NoInit, process_row, "SRGBToXYB"));
    168  return true;
    169 }
    170 
    171 Status SRGBToXYBAndLinear(const float* JXL_RESTRICT premul_absorb,
    172                          ThreadPool* pool, Image3F* JXL_RESTRICT image,
    173                          Image3F* JXL_RESTRICT linear) {
    174  const size_t xsize = image->xsize();
    175 
    176  const HWY_FULL(float) d;
    177  const auto process_row = [&](const uint32_t task,
    178                               size_t /*thread*/) -> Status {
    179    const size_t y = static_cast<size_t>(task);
    180    float* JXL_RESTRICT row_image0 = image->PlaneRow(0, y);
    181    float* JXL_RESTRICT row_image1 = image->PlaneRow(1, y);
    182    float* JXL_RESTRICT row_image2 = image->PlaneRow(2, y);
    183    float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
    184    float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
    185    float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
    186 
    187    for (size_t x = 0; x < xsize; x += Lanes(d)) {
    188      const auto in_r = LinearFromSRGB(Load(d, row_image0 + x));
    189      const auto in_g = LinearFromSRGB(Load(d, row_image1 + x));
    190      const auto in_b = LinearFromSRGB(Load(d, row_image2 + x));
    191 
    192      Store(in_r, d, row_linear0 + x);
    193      Store(in_g, d, row_linear1 + x);
    194      Store(in_b, d, row_linear2 + x);
    195 
    196      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_image0 + x,
    197                     row_image1 + x, row_image2 + x);
    198    }
    199    return true;
    200  };
    201  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
    202                                ThreadPool::NoInit, process_row,
    203                                "SRGBToXYBAndLinear"));
    204  return true;
    205 }
    206 
    207 void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
    208  const HWY_FULL(float) d;
    209  const size_t N = Lanes(d);
    210  const float mul = intensity_target / 255.0f;
    211  for (size_t j = 0; j < 3; ++j) {
    212    for (size_t i = 0; i < 3; ++i) {
    213      const auto absorb = Set(d, jxl::cms::kOpsinAbsorbanceMatrix[j][i] * mul);
    214      Store(absorb, d, premul_absorb + (j * 3 + i) * N);
    215    }
    216  }
    217  for (size_t i = 0; i < 3; ++i) {
    218    const auto neg_bias_cbrt =
    219        Set(d, -cbrtf(jxl::cms::kOpsinAbsorbanceBias[i]));
    220    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
    221  }
    222 }
    223 
    224 // This is different from Butteraugli's OpsinDynamicsImage() in the sense that
    225 // it does not contain a sensitivity multiplier based on the blurred image.
    226 Status ToXYB(const ColorEncoding& c_current, float intensity_target,
    227             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
    228             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
    229  if (black) JXL_ENSURE(SameSize(*image, *black));
    230  if (linear) JXL_ENSURE(SameSize(*image, *linear));
    231 
    232  const HWY_FULL(float) d;
    233  // Pre-broadcasted constants
    234  HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
    235  ComputePremulAbsorb(intensity_target, premul_absorb);
    236 
    237  const bool want_linear = linear != nullptr;
    238 
    239  const ColorEncoding& c_linear_srgb =
    240      ColorEncoding::LinearSRGB(c_current.IsGray());
    241  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
    242  // which undoing the sRGB transfer function would be a large part of the cost.
    243  if (c_linear_srgb.SameColorEncoding(c_current)) {
    244    // This only happens if kitten or slower, moving ImageBundle might be
    245    // possible but the encoder is much slower than this copy.
    246    if (want_linear) {
    247      JXL_RETURN_IF_ERROR(CopyImageTo(*image, linear));
    248    }
    249    JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
    250    return true;
    251  }
    252 
    253  // Common case: already sRGB, can avoid the color transform
    254  if (c_current.IsSRGB()) {
    255    // Common case: can avoid allocating/copying
    256    if (want_linear) {
    257      // Slow encoder also wants linear sRGB.
    258      JXL_RETURN_IF_ERROR(
    259          SRGBToXYBAndLinear(premul_absorb, pool, image, linear));
    260    } else {
    261      JXL_RETURN_IF_ERROR(SRGBToXYB(premul_absorb, pool, image));
    262    }
    263    return true;
    264  }
    265 
    266  JXL_RETURN_IF_ERROR(ApplyColorTransform(
    267      c_current, intensity_target, *image, black, Rect(*image), c_linear_srgb,
    268      cms, pool, want_linear ? linear : image));
    269  if (want_linear) {
    270    JXL_RETURN_IF_ERROR(CopyImageTo(*linear, image));
    271  }
    272  JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
    273  return true;
    274 }
    275 
    276 // Transform RGB to YCbCr.
    277 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
    278 Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
    279                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
    280                  ImageF* cr_plane, ThreadPool* pool) {
    281  const HWY_FULL(float) df;
    282  const size_t S = Lanes(df);  // Step.
    283 
    284  const size_t xsize = r_plane.xsize();
    285  const size_t ysize = r_plane.ysize();
    286  if ((xsize == 0) || (ysize == 0)) return true;
    287 
    288  // Full-range BT.601 as defined by JFIF Clause 7:
    289  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
    290  const auto k128 = Set(df, 128.0f / 255);
    291  const auto kR = Set(df, 0.299f);  // NTSC luma
    292  const auto kG = Set(df, 0.587f);
    293  const auto kB = Set(df, 0.114f);
    294  const auto kAmpR = Set(df, 0.701f);
    295  const auto kAmpB = Set(df, 0.886f);
    296  const auto kDiffR = Add(kAmpR, kR);
    297  const auto kDiffB = Add(kAmpB, kB);
    298  const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
    299  const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
    300 
    301  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
    302  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
    303  const size_t num_stripes = DivCeil(ysize, lines_per_group);
    304  const auto transform = [&](int idx, int /* thread*/) -> Status {
    305    const size_t y0 = idx * lines_per_group;
    306    const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
    307    for (size_t y = y0; y < y1; ++y) {
    308      const float* r_row = r_plane.ConstRow(y);
    309      const float* g_row = g_plane.ConstRow(y);
    310      const float* b_row = b_plane.ConstRow(y);
    311      float* y_row = y_plane->Row(y);
    312      float* cb_row = cb_plane->Row(y);
    313      float* cr_row = cr_plane->Row(y);
    314      for (size_t x = 0; x < xsize; x += S) {
    315        const auto r = Load(df, r_row + x);
    316        const auto g = Load(df, g_row + x);
    317        const auto b = Load(df, b_row + x);
    318        const auto r_base = Mul(r, kR);
    319        const auto r_diff = Mul(r, kDiffR);
    320        const auto g_base = Mul(g, kG);
    321        const auto b_base = Mul(b, kB);
    322        const auto b_diff = Mul(b, kDiffB);
    323        const auto y_base = Add(r_base, Add(g_base, b_base));
    324        const auto y_vec = Sub(y_base, k128);
    325        const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB);
    326        const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR);
    327        Store(y_vec, df, y_row + x);
    328        Store(cb_vec, df, cb_row + x);
    329        Store(cr_vec, df, cr_row + x);
    330      }
    331    }
    332    return true;
    333  };
    334  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<int>(num_stripes),
    335                                ThreadPool::NoInit, transform, "RgbToYcbCr"));
    336  return true;
    337 }
    338 
    339 // NOLINTNEXTLINE(google-readability-namespace-comments)
    340 }  // namespace HWY_NAMESPACE
    341 }  // namespace jxl
    342 HWY_AFTER_NAMESPACE();
    343 
    344 #if HWY_ONCE
    345 namespace jxl {
    346 HWY_EXPORT(ToXYB);
    347 Status ToXYB(const ColorEncoding& c_current, float intensity_target,
    348             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
    349             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
    350  return HWY_DYNAMIC_DISPATCH(ToXYB)(c_current, intensity_target, black, pool,
    351                                     image, cms, linear);
    352 }
    353 
    354 Status ToXYB(const ImageBundle& in, ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
    355             const JxlCmsInterface& cms, Image3F* JXL_RESTRICT linear) {
    356  JxlMemoryManager* memory_manager = in.memory_manager();
    357  JXL_ASSIGN_OR_RETURN(*xyb,
    358                       Image3F::Create(memory_manager, in.xsize(), in.ysize()));
    359  JXL_RETURN_IF_ERROR(CopyImageTo(in.color(), xyb));
    360  JXL_RETURN_IF_ERROR(ToXYB(in.c_current(), in.metadata()->IntensityTarget(),
    361                            in.black(), pool, xyb, cms, linear));
    362  return true;
    363 }
    364 
    365 HWY_EXPORT(LinearRGBRowToXYB);
    366 void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
    367                       float* JXL_RESTRICT row2,
    368                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
    369  HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB)
    370  (row0, row1, row2, premul_absorb, xsize);
    371 }
    372 
    373 HWY_EXPORT(ComputePremulAbsorb);
    374 void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
    375  HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb);
    376 }
    377 
    378 void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
    379                 float* JXL_RESTRICT row2, size_t xsize) {
    380  for (size_t x = 0; x < xsize; x++) {
    381    row2[x] = (row2[x] - row1[x] + jxl::cms::kScaledXYBOffset[2]) *
    382              jxl::cms::kScaledXYBScale[2];
    383    row0[x] = (row0[x] + jxl::cms::kScaledXYBOffset[0]) *
    384              jxl::cms::kScaledXYBScale[0];
    385    row1[x] = (row1[x] + jxl::cms::kScaledXYBOffset[1]) *
    386              jxl::cms::kScaledXYBScale[1];
    387  }
    388 }
    389 
    390 void ScaleXYB(Image3F* opsin) {
    391  for (size_t y = 0; y < opsin->ysize(); y++) {
    392    float* row0 = opsin->PlaneRow(0, y);
    393    float* row1 = opsin->PlaneRow(1, y);
    394    float* row2 = opsin->PlaneRow(2, y);
    395    ScaleXYBRow(row0, row1, row2, opsin->xsize());
    396  }
    397 }
    398 
    399 HWY_EXPORT(RgbToYcbcr);
    400 Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
    401                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
    402                  ImageF* cr_plane, ThreadPool* pool) {
    403  return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
    404                                          cb_plane, cr_plane, pool);
    405 }
    406 
    407 }  // namespace jxl
    408 #endif  // HWY_ONCE
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE