dec_xyb.cc (12656B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/dec_xyb.h" 7 8 #include <cstring> 9 10 #undef HWY_TARGET_INCLUDE 11 #define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc" 12 #include <hwy/foreach_target.h> 13 #include <hwy/highway.h> 14 15 #include "lib/jxl/base/compiler_specific.h" 16 #include "lib/jxl/base/matrix_ops.h" 17 #include "lib/jxl/base/rect.h" 18 #include "lib/jxl/base/sanitizers.h" 19 #include "lib/jxl/base/status.h" 20 #include "lib/jxl/cms/jxl_cms_internal.h" 21 #include "lib/jxl/cms/opsin_params.h" 22 #include "lib/jxl/color_encoding_internal.h" 23 #include "lib/jxl/dec_xyb-inl.h" 24 #include "lib/jxl/image.h" 25 #include "lib/jxl/opsin_params.h" 26 #include "lib/jxl/quantizer.h" 27 HWY_BEFORE_NAMESPACE(); 28 namespace jxl { 29 namespace HWY_NAMESPACE { 30 31 // These templates are not found via ADL. 32 using hwy::HWY_NAMESPACE::MulAdd; 33 34 Status OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, 35 const OpsinParams& opsin_params) { 36 JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout)); 37 38 const size_t xsize = inout->xsize(); // not padded 39 const auto process_row = [&](const uint32_t task, 40 size_t /* thread */) -> Status { 41 const size_t y = task; 42 43 // Faster than adding via ByteOffset at end of loop. 44 float* JXL_RESTRICT row0 = inout->PlaneRow(0, y); 45 float* JXL_RESTRICT row1 = inout->PlaneRow(1, y); 46 float* JXL_RESTRICT row2 = inout->PlaneRow(2, y); 47 48 const HWY_FULL(float) d; 49 50 for (size_t x = 0; x < xsize; x += Lanes(d)) { 51 const auto in_opsin_x = Load(d, row0 + x); 52 const auto in_opsin_y = Load(d, row1 + x); 53 const auto in_opsin_b = Load(d, row2 + x); 54 auto linear_r = Undefined(d); 55 auto linear_g = Undefined(d); 56 auto linear_b = Undefined(d); 57 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, &linear_r, 58 &linear_g, &linear_b); 59 60 Store(linear_r, d, row0 + x); 61 Store(linear_g, d, row1 + x); 62 Store(linear_b, d, row2 + x); 63 } 64 return true; 65 }; 66 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, inout->ysize(), ThreadPool::NoInit, 67 process_row, "OpsinToLinear")); 68 return true; 69 } 70 71 // Same, but not in-place. 72 Status OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, 73 Image3F* JXL_RESTRICT linear, 74 const OpsinParams& opsin_params) { 75 JXL_ENSURE(SameSize(rect, *linear)); 76 JXL_CHECK_IMAGE_INITIALIZED(opsin, rect); 77 78 const auto process_row = [&](const uint32_t task, 79 size_t /*thread*/) -> Status { 80 const size_t y = static_cast<size_t>(task); 81 82 // Faster than adding via ByteOffset at end of loop. 83 const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y); 84 const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y); 85 const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y); 86 float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y); 87 float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y); 88 float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y); 89 90 const HWY_FULL(float) d; 91 92 for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) { 93 const auto in_opsin_x = Load(d, row_opsin_0 + x); 94 const auto in_opsin_y = Load(d, row_opsin_1 + x); 95 const auto in_opsin_b = Load(d, row_opsin_2 + x); 96 auto linear_r = Undefined(d); 97 auto linear_g = Undefined(d); 98 auto linear_b = Undefined(d); 99 XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, &linear_r, 100 &linear_g, &linear_b); 101 102 Store(linear_r, d, row_linear_0 + x); 103 Store(linear_g, d, row_linear_1 + x); 104 Store(linear_b, d, row_linear_2 + x); 105 } 106 return true; 107 }; 108 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<int>(rect.ysize()), 109 ThreadPool::NoInit, process_row, 110 "OpsinToLinear(Rect)")); 111 JXL_CHECK_IMAGE_INITIALIZED(*linear, rect); 112 return true; 113 } 114 115 // Transform YCbCr to RGB. 116 // Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B). 117 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { 118 JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect); 119 const HWY_CAPPED(float, kBlockDim) df; 120 const size_t S = Lanes(df); // Step. 121 122 const size_t xsize = rect.xsize(); 123 const size_t ysize = rect.ysize(); 124 if ((xsize == 0) || (ysize == 0)) return; 125 126 // Full-range BT.601 as defined by JFIF Clause 7: 127 // https://www.itu.int/rec/T-REC-T.871-201105-I/en 128 const auto c128 = Set(df, 128.0f / 255); 129 const auto crcr = Set(df, 1.402f); 130 const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); 131 const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); 132 const auto cbcb = Set(df, 1.772f); 133 134 for (size_t y = 0; y < ysize; y++) { 135 const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y); 136 const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y); 137 const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y); 138 float* r_row = rect.PlaneRow(rgb, 0, y); 139 float* g_row = rect.PlaneRow(rgb, 1, y); 140 float* b_row = rect.PlaneRow(rgb, 2, y); 141 for (size_t x = 0; x < xsize; x += S) { 142 const auto y_vec = Add(Load(df, y_row + x), c128); 143 const auto cb_vec = Load(df, cb_row + x); 144 const auto cr_vec = Load(df, cr_row + x); 145 const auto r_vec = MulAdd(crcr, cr_vec, y_vec); 146 const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); 147 const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); 148 Store(r_vec, df, r_row + x); 149 Store(g_vec, df, g_row + x); 150 Store(b_vec, df, b_row + x); 151 } 152 } 153 JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect); 154 } 155 156 // NOLINTNEXTLINE(google-readability-namespace-comments) 157 } // namespace HWY_NAMESPACE 158 } // namespace jxl 159 HWY_AFTER_NAMESPACE(); 160 161 #if HWY_ONCE 162 namespace jxl { 163 164 HWY_EXPORT(OpsinToLinearInplace); 165 Status OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, 166 const OpsinParams& opsin_params) { 167 return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params); 168 } 169 170 HWY_EXPORT(OpsinToLinear); 171 Status OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, 172 Image3F* JXL_RESTRICT linear, 173 const OpsinParams& opsin_params) { 174 return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear, 175 opsin_params); 176 } 177 178 HWY_EXPORT(YcbcrToRgb); 179 void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { 180 HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect); 181 } 182 183 HWY_EXPORT(HasFastXYBTosRGB8); 184 bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); } 185 186 HWY_EXPORT(FastXYBTosRGB8); 187 Status FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba, 188 size_t xsize) { 189 return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize); 190 } 191 192 void OpsinParams::Init(float intensity_target) { 193 InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix, 194 intensity_target); 195 memcpy(opsin_biases, jxl::cms::kNegOpsinAbsorbanceBiasRGB.data(), 196 sizeof(jxl::cms::kNegOpsinAbsorbanceBiasRGB)); 197 memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias)); 198 for (size_t c = 0; c < 4; c++) { 199 opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]); 200 } 201 } 202 203 bool CanOutputToColorEncoding(const ColorEncoding& c_desired) { 204 if (!c_desired.HaveFields()) { 205 return false; 206 } 207 // TODO(veluca): keep in sync with dec_reconstruct.cc 208 const auto& tf = c_desired.Tf(); 209 if (!tf.IsPQ() && !tf.IsSRGB() && !tf.have_gamma && !tf.IsLinear() && 210 !tf.IsHLG() && !tf.IsDCI() && !tf.Is709()) { 211 return false; 212 } 213 if (c_desired.IsGray() && c_desired.GetWhitePointType() != WhitePoint::kD65) { 214 // TODO(veluca): figure out what should happen here. 215 return false; 216 } 217 return true; 218 } 219 220 Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) { 221 orig_color_encoding = metadata.m.color_encoding; 222 orig_intensity_target = metadata.m.IntensityTarget(); 223 desired_intensity_target = orig_intensity_target; 224 const auto& im = metadata.transform_data.opsin_inverse_matrix; 225 orig_inverse_matrix = im.inverse_matrix; 226 default_transform = im.all_default; 227 xyb_encoded = metadata.m.xyb_encoded; 228 std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases), 229 opsin_params.opsin_biases); 230 for (int i = 0; i < 3; ++i) { 231 opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]); 232 } 233 opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1; 234 std::copy(std::begin(im.quant_biases), std::end(im.quant_biases), 235 opsin_params.quant_biases); 236 bool orig_ok = CanOutputToColorEncoding(orig_color_encoding); 237 bool orig_grey = orig_color_encoding.IsGray(); 238 return SetColorEncoding(!xyb_encoded || orig_ok 239 ? orig_color_encoding 240 : ColorEncoding::LinearSRGB(orig_grey)); 241 } 242 243 Status OutputEncodingInfo::MaybeSetColorEncoding( 244 const ColorEncoding& c_desired) { 245 if (c_desired.GetColorSpace() == ColorSpace::kXYB && 246 ((color_encoding.GetColorSpace() == ColorSpace::kRGB && 247 color_encoding.GetPrimariesType() != Primaries::kSRGB) || 248 color_encoding.Tf().IsPQ())) { 249 return false; 250 } 251 if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) { 252 return false; 253 } 254 return SetColorEncoding(c_desired); 255 } 256 257 Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) { 258 color_encoding = c_desired; 259 linear_color_encoding = color_encoding; 260 linear_color_encoding.Tf().SetTransferFunction(TransferFunction::kLinear); 261 color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired); 262 263 // Compute the opsin inverse matrix and luminances based on primaries and 264 // white point. 265 Matrix3x3 inverse_matrix; 266 bool inverse_matrix_is_default = default_transform; 267 inverse_matrix = orig_inverse_matrix; 268 constexpr Vector3 kSRGBLuminances{0.2126, 0.7152, 0.0722}; 269 luminances = kSRGBLuminances; 270 if ((c_desired.GetPrimariesType() != Primaries::kSRGB || 271 c_desired.GetWhitePointType() != WhitePoint::kD65) && 272 !c_desired.IsGray()) { 273 Matrix3x3 srgb_to_xyzd50; 274 const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false); 275 PrimariesCIExy p; 276 JXL_RETURN_IF_ERROR(srgb.GetPrimaries(p)); 277 CIExy w = srgb.GetWhitePoint(); 278 JXL_RETURN_IF_ERROR(PrimariesToXYZD50(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, 279 p.b.y, w.x, w.y, srgb_to_xyzd50)); 280 Matrix3x3 original_to_xyz; 281 JXL_RETURN_IF_ERROR(c_desired.GetPrimaries(p)); 282 w = c_desired.GetWhitePoint(); 283 if (!PrimariesToXYZ(p.r.x, p.r.y, p.g.x, p.g.y, p.b.x, p.b.y, w.x, w.y, 284 original_to_xyz)) { 285 return JXL_FAILURE("PrimariesToXYZ failed"); 286 } 287 luminances = original_to_xyz[1]; 288 if (xyb_encoded) { 289 Matrix3x3 adapt_to_d50; 290 if (!AdaptToXYZD50(c_desired.GetWhitePoint().x, 291 c_desired.GetWhitePoint().y, adapt_to_d50)) { 292 return JXL_FAILURE("AdaptToXYZD50 failed"); 293 } 294 Matrix3x3 xyzd50_to_original; 295 Mul3x3Matrix(adapt_to_d50, original_to_xyz, xyzd50_to_original); 296 JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original)); 297 Matrix3x3 srgb_to_original; 298 Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original); 299 Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix); 300 inverse_matrix_is_default = false; 301 } 302 } 303 304 if (c_desired.IsGray()) { 305 Matrix3x3 tmp_inv_matrix = inverse_matrix; 306 Matrix3x3 srgb_to_luma{luminances, luminances, luminances}; 307 Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix); 308 } 309 310 // The internal XYB color space uses absolute luminance, so we scale back the 311 // opsin inverse matrix to relative luminance where 1.0 corresponds to the 312 // original intensity target. 313 if (xyb_encoded) { 314 InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix, 315 orig_intensity_target); 316 all_default_opsin = (std::abs(orig_intensity_target - 255.0) <= 0.1f && 317 inverse_matrix_is_default); 318 } 319 320 // Set the inverse gamma based on color space transfer function. 321 const auto& tf = c_desired.Tf(); 322 inverse_gamma = (tf.have_gamma ? tf.GetGamma() 323 : tf.IsDCI() ? 1.0f / 2.6f 324 : 1.0); 325 return true; 326 } 327 328 } // namespace jxl 329 #endif // HWY_ONCE