dec_external_image.cc (20027B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jxl/dec_external_image.h" 7 8 #include <jxl/memory_manager.h> 9 #include <jxl/types.h> 10 11 #include <algorithm> 12 #include <cstring> 13 #include <utility> 14 #include <vector> 15 16 #include "lib/jxl/base/status.h" 17 #include "lib/jxl/image.h" 18 19 #undef HWY_TARGET_INCLUDE 20 #define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc" 21 #include <hwy/foreach_target.h> 22 #include <hwy/highway.h> 23 24 #include "lib/jxl/alpha.h" 25 #include "lib/jxl/base/byte_order.h" 26 #include "lib/jxl/base/common.h" 27 #include "lib/jxl/base/compiler_specific.h" 28 #include "lib/jxl/base/printf_macros.h" 29 #include "lib/jxl/base/sanitizers.h" 30 31 HWY_BEFORE_NAMESPACE(); 32 namespace jxl { 33 namespace HWY_NAMESPACE { 34 35 // These templates are not found via ADL. 36 using hwy::HWY_NAMESPACE::Clamp; 37 using hwy::HWY_NAMESPACE::Mul; 38 using hwy::HWY_NAMESPACE::NearestInt; 39 40 // TODO(jon): check if this can be replaced by a FloatToU16 function 41 void FloatToU32(const float* in, uint32_t* out, size_t num, float mul, 42 size_t bits_per_sample) { 43 const HWY_FULL(float) d; 44 const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du; 45 46 // Unpoison accessing partially-uninitialized vectors with memory sanitizer. 47 // This is because we run NearestInt() on the vector, which triggers MSAN even 48 // it is safe to do so since the values are not mixed between lanes. 49 const size_t num_round_up = RoundUpTo(num, Lanes(d)); 50 msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); 51 52 const auto one = Set(d, 1.0f); 53 const auto scale = Set(d, mul); 54 for (size_t x = 0; x < num; x += Lanes(d)) { 55 auto v = Load(d, in + x); 56 // Clamp turns NaN to 'min'. 57 v = Clamp(v, Zero(d), one); 58 auto i = NearestInt(Mul(v, scale)); 59 Store(BitCast(du, i), du, out + x); 60 } 61 62 // Poison back the output. 63 msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); 64 } 65 66 void FloatToF16(const float* in, hwy::float16_t* out, size_t num) { 67 const HWY_FULL(float) d; 68 const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du; 69 70 // Unpoison accessing partially-uninitialized vectors with memory sanitizer. 71 // This is because we run DemoteTo() on the vector which triggers msan. 72 const size_t num_round_up = RoundUpTo(num, Lanes(d)); 73 msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); 74 75 for (size_t x = 0; x < num; x += Lanes(d)) { 76 auto v = Load(d, in + x); 77 auto v16 = DemoteTo(du, v); 78 Store(v16, du, out + x); 79 } 80 81 // Poison back the output. 82 msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); 83 } 84 85 // NOLINTNEXTLINE(google-readability-namespace-comments) 86 } // namespace HWY_NAMESPACE 87 } // namespace jxl 88 HWY_AFTER_NAMESPACE(); 89 90 #if HWY_ONCE 91 92 namespace jxl { 93 namespace { 94 95 // Stores a float in big endian 96 void StoreBEFloat(float value, uint8_t* p) { 97 uint32_t u; 98 memcpy(&u, &value, 4); 99 StoreBE32(u, p); 100 } 101 102 // Stores a float in little endian 103 void StoreLEFloat(float value, uint8_t* p) { 104 uint32_t u; 105 memcpy(&u, &value, 4); 106 StoreLE32(u, p); 107 } 108 109 // The orientation may not be identity. 110 // TODO(lode): SIMDify where possible 111 template <typename T> 112 Status UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image, 113 Plane<T>& out, jxl::ThreadPool* pool) { 114 const size_t xsize = image.xsize(); 115 const size_t ysize = image.ysize(); 116 JxlMemoryManager* memory_manager = image.memory_manager(); 117 118 if (undo_orientation == Orientation::kFlipHorizontal) { 119 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); 120 const auto process_row = [&](const uint32_t task, 121 size_t /*thread*/) -> Status { 122 const int64_t y = task; 123 const T* JXL_RESTRICT row_in = image.Row(y); 124 T* JXL_RESTRICT row_out = out.Row(y); 125 for (size_t x = 0; x < xsize; ++x) { 126 row_out[xsize - x - 1] = row_in[x]; 127 } 128 return true; 129 }; 130 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 131 ThreadPool::NoInit, process_row, 132 "UndoOrientation")); 133 } else if (undo_orientation == Orientation::kRotate180) { 134 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); 135 const auto process_row = [&](const uint32_t task, 136 size_t /*thread*/) -> Status { 137 const int64_t y = task; 138 const T* JXL_RESTRICT row_in = image.Row(y); 139 T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); 140 for (size_t x = 0; x < xsize; ++x) { 141 row_out[xsize - x - 1] = row_in[x]; 142 } 143 return true; 144 }; 145 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 146 ThreadPool::NoInit, process_row, 147 "UndoOrientation")); 148 } else if (undo_orientation == Orientation::kFlipVertical) { 149 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); 150 const auto process_row = [&](const uint32_t task, 151 size_t /*thread*/) -> Status { 152 const int64_t y = task; 153 const T* JXL_RESTRICT row_in = image.Row(y); 154 T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); 155 for (size_t x = 0; x < xsize; ++x) { 156 row_out[x] = row_in[x]; 157 } 158 return true; 159 }; 160 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 161 ThreadPool::NoInit, process_row, 162 "UndoOrientation")); 163 } else if (undo_orientation == Orientation::kTranspose) { 164 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); 165 const auto process_row = [&](const uint32_t task, 166 size_t /*thread*/) -> Status { 167 const int64_t y = task; 168 const T* JXL_RESTRICT row_in = image.Row(y); 169 for (size_t x = 0; x < xsize; ++x) { 170 out.Row(x)[y] = row_in[x]; 171 } 172 return true; 173 }; 174 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 175 ThreadPool::NoInit, process_row, 176 "UndoOrientation")); 177 } else if (undo_orientation == Orientation::kRotate90) { 178 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); 179 const auto process_row = [&](const uint32_t task, 180 size_t /*thread*/) -> Status { 181 const int64_t y = task; 182 const T* JXL_RESTRICT row_in = image.Row(y); 183 for (size_t x = 0; x < xsize; ++x) { 184 out.Row(x)[ysize - y - 1] = row_in[x]; 185 } 186 return true; 187 }; 188 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 189 ThreadPool::NoInit, process_row, 190 "UndoOrientation")); 191 } else if (undo_orientation == Orientation::kAntiTranspose) { 192 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); 193 const auto process_row = [&](const uint32_t task, 194 size_t /*thread*/) -> Status { 195 const int64_t y = task; 196 const T* JXL_RESTRICT row_in = image.Row(y); 197 for (size_t x = 0; x < xsize; ++x) { 198 out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x]; 199 } 200 return true; 201 }; 202 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 203 ThreadPool::NoInit, process_row, 204 "UndoOrientation")); 205 } else if (undo_orientation == Orientation::kRotate270) { 206 JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); 207 const auto process_row = [&](const uint32_t task, 208 size_t /*thread*/) -> Status { 209 const int64_t y = task; 210 const T* JXL_RESTRICT row_in = image.Row(y); 211 for (size_t x = 0; x < xsize; ++x) { 212 out.Row(xsize - x - 1)[y] = row_in[x]; 213 } 214 return true; 215 }; 216 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 217 ThreadPool::NoInit, process_row, 218 "UndoOrientation")); 219 } 220 return true; 221 } 222 } // namespace 223 224 HWY_EXPORT(FloatToU32); 225 HWY_EXPORT(FloatToF16); 226 227 namespace { 228 229 using StoreFuncType = void(uint32_t value, uint8_t* dest); 230 template <StoreFuncType StoreFunc> 231 void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels, 232 size_t xsize, size_t bytes_per_sample, 233 uint8_t* JXL_RESTRICT out) { 234 for (size_t x = 0; x < xsize; ++x) { 235 for (size_t c = 0; c < num_channels; c++) { 236 StoreFunc(rows_u32[c][x], 237 out + (num_channels * x + c) * bytes_per_sample); 238 } 239 } 240 } 241 242 template <void(StoreFunc)(float, uint8_t*)> 243 void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels, 244 size_t xsize, uint8_t* JXL_RESTRICT out) { 245 for (size_t x = 0; x < xsize; ++x) { 246 for (size_t c = 0; c < num_channels; c++) { 247 StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float)); 248 } 249 } 250 } 251 252 void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; } 253 254 } // namespace 255 256 Status ConvertChannelsToExternal(const ImageF* in_channels[], 257 size_t num_channels, size_t bits_per_sample, 258 bool float_out, JxlEndianness endianness, 259 size_t stride, jxl::ThreadPool* pool, 260 void* out_image, size_t out_size, 261 const PixelCallback& out_callback, 262 jxl::Orientation undo_orientation) { 263 JXL_ENSURE(num_channels != 0 && num_channels <= kConvertMaxChannels); 264 JXL_ENSURE(in_channels[0] != nullptr); 265 JxlMemoryManager* memory_manager = in_channels[0]->memory_manager(); 266 JXL_ENSURE(float_out ? bits_per_sample == 16 || bits_per_sample == 32 267 : bits_per_sample > 0 && bits_per_sample <= 16); 268 const bool has_out_image = (out_image != nullptr); 269 if (has_out_image == out_callback.IsPresent()) { 270 return JXL_FAILURE( 271 "Must provide either an out_image or an out_callback, but not both."); 272 } 273 std::vector<const ImageF*> channels; 274 channels.assign(in_channels, in_channels + num_channels); 275 276 const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte); 277 const size_t bytes_per_pixel = num_channels * bytes_per_channel; 278 279 std::vector<std::vector<uint8_t>> row_out_callback; 280 const auto FreeCallbackOpaque = [&out_callback](void* p) { 281 out_callback.destroy(p); 282 }; 283 std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque( 284 nullptr, FreeCallbackOpaque); 285 auto InitOutCallback = [&](size_t num_threads) -> Status { 286 if (out_callback.IsPresent()) { 287 out_run_opaque.reset(out_callback.Init(num_threads, stride)); 288 JXL_RETURN_IF_ERROR(out_run_opaque != nullptr); 289 row_out_callback.resize(num_threads); 290 for (size_t i = 0; i < num_threads; ++i) { 291 row_out_callback[i].resize(stride); 292 } 293 } 294 return true; 295 }; 296 297 // Channels used to store the transformed original channels if needed. 298 ImageF temp_channels[kConvertMaxChannels]; 299 if (undo_orientation != Orientation::kIdentity) { 300 for (size_t c = 0; c < num_channels; ++c) { 301 if (channels[c]) { 302 JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c], 303 temp_channels[c], pool)); 304 channels[c] = &(temp_channels[c]); 305 } 306 } 307 } 308 309 // First channel may not be nullptr. 310 size_t xsize = channels[0]->xsize(); 311 size_t ysize = channels[0]->ysize(); 312 if (stride < bytes_per_pixel * xsize) { 313 return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS 314 " vs %" PRIuS, 315 stride, bytes_per_pixel * xsize); 316 } 317 if (!out_callback.IsPresent() && 318 out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) { 319 return JXL_FAILURE("out_size is too small to store image"); 320 } 321 322 const bool little_endian = 323 endianness == JXL_LITTLE_ENDIAN || 324 (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()); 325 326 // Handle the case where a channel is nullptr by creating a single row with 327 // ones to use instead. 328 ImageF ones; 329 for (size_t c = 0; c < num_channels; ++c) { 330 if (!channels[c]) { 331 JXL_ASSIGN_OR_RETURN(ones, ImageF::Create(memory_manager, xsize, 1)); 332 FillImage(1.0f, &ones); 333 break; 334 } 335 } 336 337 if (float_out) { 338 if (bits_per_sample == 16) { 339 bool swap_endianness = little_endian != IsLittleEndian(); 340 Plane<hwy::float16_t> f16_cache; 341 const auto init_cache = [&](size_t num_threads) -> Status { 342 JXL_ASSIGN_OR_RETURN( 343 f16_cache, Plane<hwy::float16_t>::Create( 344 memory_manager, xsize, num_channels * num_threads)); 345 JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); 346 return true; 347 }; 348 const auto process_row = [&](const uint32_t task, 349 const size_t thread) -> Status { 350 const int64_t y = task; 351 const float* JXL_RESTRICT row_in[kConvertMaxChannels]; 352 for (size_t c = 0; c < num_channels; c++) { 353 row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); 354 } 355 hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels]; 356 for (size_t c = 0; c < num_channels; c++) { 357 row_f16[c] = f16_cache.Row(c + thread * num_channels); 358 HWY_DYNAMIC_DISPATCH(FloatToF16) 359 (row_in[c], row_f16[c], xsize); 360 } 361 uint8_t* row_out = 362 out_callback.IsPresent() 363 ? row_out_callback[thread].data() 364 : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; 365 // interleave the one scanline 366 hwy::float16_t* row_f16_out = 367 reinterpret_cast<hwy::float16_t*>(row_out); 368 for (size_t x = 0; x < xsize; x++) { 369 for (size_t c = 0; c < num_channels; c++) { 370 row_f16_out[x * num_channels + c] = row_f16[c][x]; 371 } 372 } 373 if (swap_endianness) { 374 size_t size = xsize * num_channels * 2; 375 for (size_t i = 0; i < size; i += 2) { 376 std::swap(row_out[i + 0], row_out[i + 1]); 377 } 378 } 379 if (out_callback.IsPresent()) { 380 out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); 381 } 382 return true; 383 }; 384 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 385 init_cache, process_row, "ConvertF16")); 386 } else if (bits_per_sample == 32) { 387 const auto init_cache = [&](size_t num_threads) -> Status { 388 JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); 389 return true; 390 }; 391 const auto process_row = [&](const uint32_t task, 392 const size_t thread) -> Status { 393 const int64_t y = task; 394 uint8_t* row_out = 395 out_callback.IsPresent() 396 ? row_out_callback[thread].data() 397 : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; 398 const float* JXL_RESTRICT row_in[kConvertMaxChannels]; 399 for (size_t c = 0; c < num_channels; c++) { 400 row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); 401 } 402 if (little_endian) { 403 StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, row_out); 404 } else { 405 StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, row_out); 406 } 407 if (out_callback.IsPresent()) { 408 out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); 409 } 410 return true; 411 }; 412 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 413 init_cache, process_row, "ConvertFloat")); 414 } else { 415 return JXL_FAILURE("float other than 16-bit and 32-bit not supported"); 416 } 417 } else { 418 // Multiplier to convert from floating point 0-1 range to the integer 419 // range. 420 float mul = (1ull << bits_per_sample) - 1; 421 Plane<uint32_t> u32_cache; 422 const auto init_cache = [&](size_t num_threads) -> Status { 423 JXL_ASSIGN_OR_RETURN(u32_cache, 424 Plane<uint32_t>::Create(memory_manager, xsize, 425 num_channels * num_threads)); 426 JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); 427 return true; 428 }; 429 const auto process_row = [&](const uint32_t task, 430 const size_t thread) -> Status { 431 const int64_t y = task; 432 uint8_t* row_out = 433 out_callback.IsPresent() 434 ? row_out_callback[thread].data() 435 : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; 436 const float* JXL_RESTRICT row_in[kConvertMaxChannels]; 437 for (size_t c = 0; c < num_channels; c++) { 438 row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); 439 } 440 uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels]; 441 for (size_t c = 0; c < num_channels; c++) { 442 row_u32[c] = u32_cache.Row(c + thread * num_channels); 443 // row_u32[] is a per-thread temporary row storage, this isn't 444 // intended to be initialized on a previous run. 445 msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0])); 446 HWY_DYNAMIC_DISPATCH(FloatToU32) 447 (row_in[c], row_u32[c], xsize, mul, bits_per_sample); 448 } 449 if (bits_per_sample <= 8) { 450 StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, row_out); 451 } else { 452 if (little_endian) { 453 StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, row_out); 454 } else { 455 StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, row_out); 456 } 457 } 458 if (out_callback.IsPresent()) { 459 out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); 460 } 461 return true; 462 }; 463 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 464 init_cache, process_row, "ConvertUint")); 465 } 466 return true; 467 } 468 469 Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample, 470 bool float_out, size_t num_channels, 471 JxlEndianness endianness, size_t stride, 472 jxl::ThreadPool* pool, void* out_image, 473 size_t out_size, const PixelCallback& out_callback, 474 jxl::Orientation undo_orientation, 475 bool unpremul_alpha) { 476 bool want_alpha = num_channels == 2 || num_channels == 4; 477 size_t color_channels = num_channels <= 2 ? 1 : 3; 478 479 const Image3F* color = &ib.color(); 480 JxlMemoryManager* memory_manager = color->memory_manager(); 481 // Undo premultiplied alpha. 482 Image3F unpremul; 483 if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) { 484 JXL_ASSIGN_OR_RETURN( 485 unpremul, 486 Image3F::Create(memory_manager, color->xsize(), color->ysize())); 487 JXL_RETURN_IF_ERROR(CopyImageTo(*color, &unpremul)); 488 const ImageF* alpha = ib.alpha(); 489 for (size_t y = 0; y < unpremul.ysize(); y++) { 490 UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y), 491 unpremul.PlaneRow(2, y), alpha->Row(y), 492 unpremul.xsize()); 493 } 494 color = &unpremul; 495 } 496 497 const ImageF* channels[kConvertMaxChannels]; 498 size_t c = 0; 499 for (; c < color_channels; c++) { 500 channels[c] = &color->Plane(c); 501 } 502 if (want_alpha) { 503 channels[c++] = ib.alpha(); 504 } 505 JXL_ENSURE(num_channels == c); 506 507 return ConvertChannelsToExternal( 508 channels, num_channels, bits_per_sample, float_out, endianness, stride, 509 pool, out_image, out_size, out_callback, undo_orientation); 510 } 511 512 } // namespace jxl 513 #endif // HWY_ONCE