image_metadata.h (16094B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 // Main codestream header bundles, the metadata that applies to all frames. 7 // Enums must align with the C API definitions in codestream_header.h. 8 9 #ifndef LIB_JXL_IMAGE_METADATA_H_ 10 #define LIB_JXL_IMAGE_METADATA_H_ 11 12 #include <jxl/codestream_header.h> 13 14 #include <cstddef> 15 #include <cstdint> 16 #include <string> 17 #include <vector> 18 19 #include "lib/jxl/base/compiler_specific.h" 20 #include "lib/jxl/base/matrix_ops.h" 21 #include "lib/jxl/base/status.h" 22 #include "lib/jxl/color_encoding_internal.h" 23 #include "lib/jxl/dec_bit_reader.h" 24 #include "lib/jxl/field_encodings.h" 25 #include "lib/jxl/fields.h" 26 #include "lib/jxl/headers.h" 27 28 namespace jxl { 29 30 struct AuxOut; 31 enum class LayerType : uint8_t; 32 33 // EXIF orientation of the image. This field overrides any field present in 34 // actual EXIF metadata. The value tells which transformation the decoder must 35 // apply after decoding to display the image with the correct orientation. 36 enum class Orientation : uint32_t { 37 // Values 1..8 match the EXIF definitions. 38 kIdentity = JXL_ORIENT_IDENTITY, 39 kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL, 40 kRotate180 = JXL_ORIENT_ROTATE_180, 41 kFlipVertical = JXL_ORIENT_FLIP_VERTICAL, 42 kTranspose = JXL_ORIENT_TRANSPOSE, 43 kRotate90 = JXL_ORIENT_ROTATE_90_CW, 44 kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE, 45 kRotate270 = JXL_ORIENT_ROTATE_90_CCW, 46 }; 47 // Don't need an EnumBits because Orientation is not read via Enum(). 48 49 enum class ExtraChannel : uint32_t { 50 // First two enumerators (most common) are cheaper to encode 51 kAlpha = JXL_CHANNEL_ALPHA, 52 kDepth = JXL_CHANNEL_DEPTH, 53 54 kSpotColor = JXL_CHANNEL_SPOT_COLOR, 55 kSelectionMask = JXL_CHANNEL_SELECTION_MASK, 56 kBlack = JXL_CHANNEL_BLACK, // for CMYK 57 kCFA = JXL_CHANNEL_CFA, // Bayer channel 58 kThermal = JXL_CHANNEL_THERMAL, 59 kReserved0 = JXL_CHANNEL_RESERVED0, 60 kReserved1 = JXL_CHANNEL_RESERVED1, 61 kReserved2 = JXL_CHANNEL_RESERVED2, 62 kReserved3 = JXL_CHANNEL_RESERVED3, 63 kReserved4 = JXL_CHANNEL_RESERVED4, 64 kReserved5 = JXL_CHANNEL_RESERVED5, 65 kReserved6 = JXL_CHANNEL_RESERVED6, 66 kReserved7 = JXL_CHANNEL_RESERVED7, 67 // disambiguated via name string, raise warning if unsupported 68 kUnknown = JXL_CHANNEL_UNKNOWN, 69 // like kUnknown but can silently be ignored 70 kOptional = JXL_CHANNEL_OPTIONAL 71 }; 72 static inline const char* EnumName(ExtraChannel /*unused*/) { 73 return "ExtraChannel"; 74 } 75 static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) { 76 using EC = ExtraChannel; 77 return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) | 78 MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) | 79 MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional); 80 } 81 82 // Used in ImageMetadata and ExtraChannelInfo. 83 struct BitDepth : public Fields { 84 BitDepth(); 85 JXL_FIELDS_NAME(BitDepth) 86 87 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 88 89 std::string DebugString() const; 90 91 // Whether the original (uncompressed) samples are floating point or 92 // unsigned integer. 93 bool floating_point_sample; 94 95 // Bit depth of the original (uncompressed) image samples. Must be in the 96 // range [1, 32]. 97 uint32_t bits_per_sample; 98 99 // Floating point exponent bits of the original (uncompressed) image samples, 100 // only used if floating_point_sample is true. 101 // If used, the samples are floating point with: 102 // - 1 sign bit 103 // - exponent_bits_per_sample exponent bits 104 // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits 105 // If used, exponent_bits_per_sample must be in the range 106 // [2, 8] and amount of mantissa bits must be in the range [2, 23]. 107 // NOTE: exponent_bits_per_sample is 8 for single precision binary32 108 // point, 5 for half precision binary16, 7 for fp24. 109 uint32_t exponent_bits_per_sample; 110 }; 111 112 // Describes one extra channel. 113 struct ExtraChannelInfo : public Fields { 114 ExtraChannelInfo(); 115 JXL_FIELDS_NAME(ExtraChannelInfo) 116 117 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 118 119 std::string DebugString() const; 120 121 mutable bool all_default; 122 123 ExtraChannel type; 124 BitDepth bit_depth; 125 uint32_t dim_shift; // downsampled by 2^dim_shift on each axis 126 127 std::string name; // UTF-8 128 129 // Conditional: 130 bool alpha_associated; // i.e. premultiplied 131 float spot_color[4]; // spot color in linear RGBA 132 uint32_t cfa_channel; 133 }; 134 135 struct OpsinInverseMatrix : public Fields { 136 OpsinInverseMatrix(); 137 JXL_FIELDS_NAME(OpsinInverseMatrix) 138 139 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 140 141 mutable bool all_default; 142 143 Matrix3x3 inverse_matrix; 144 float opsin_biases[3]; 145 float quant_biases[4]; 146 }; 147 148 // Information useful for mapping HDR images to lower dynamic range displays. 149 struct ToneMapping : public Fields { 150 ToneMapping(); 151 JXL_FIELDS_NAME(ToneMapping) 152 153 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 154 155 mutable bool all_default; 156 157 // Upper bound on the intensity level present in the image. For unsigned 158 // integer pixel encodings, this is the brightness of the largest 159 // representable value. The image does not necessarily contain a pixel 160 // actually this bright. An encoder is allowed to set 255 for SDR images 161 // without computing a histogram. 162 float intensity_target; // [nits] 163 164 // Lower bound on the intensity level present in the image. This may be 165 // loose, i.e. lower than the actual darkest pixel. When tone mapping, a 166 // decoder will map [min_nits, intensity_target] to the display range. 167 float min_nits; 168 169 bool relative_to_max_display; // see below 170 // The tone mapping will leave unchanged (linear mapping) any pixels whose 171 // brightness is strictly below this. The interpretation depends on 172 // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum 173 // display brightness [nits], otherwise an absolute brightness [nits]. 174 float linear_below; 175 }; 176 177 // Contains weights to customize some transforms - in particular, XYB and 178 // upsampling. 179 struct CustomTransformData : public Fields { 180 CustomTransformData(); 181 JXL_FIELDS_NAME(CustomTransformData) 182 183 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 184 185 // Must be set before calling VisitFields. Must equal xyb_encoded of 186 // ImageMetadata, should be set by ImageMetadata during VisitFields. 187 bool nonserialized_xyb_encoded = false; 188 189 mutable bool all_default; 190 191 OpsinInverseMatrix opsin_inverse_matrix; 192 193 uint32_t custom_weights_mask; 194 float upsampling2_weights[15]; 195 float upsampling4_weights[55]; 196 float upsampling8_weights[210]; 197 }; 198 199 // Properties of the original image bundle. This enables Encode(Decode()) to 200 // re-create an equivalent image without user input. 201 struct ImageMetadata : public Fields { 202 ImageMetadata(); 203 JXL_FIELDS_NAME(ImageMetadata) 204 205 Status VisitFields(Visitor* JXL_RESTRICT visitor) override; 206 207 // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha 208 // channel present. In the theoretical case that there are multiple alpha 209 // channels, returns the bit depth of the first. 210 uint32_t GetAlphaBits() const { 211 const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha); 212 if (alpha == nullptr) return 0; 213 JXL_DASSERT(alpha->bit_depth.bits_per_sample != 0); 214 return alpha->bit_depth.bits_per_sample; 215 } 216 217 // Sets bit depth of alpha channel, adding extra channel if needed, or 218 // removing all alpha channels if bits is 0. 219 // Assumes integer alpha channel and not designed to support multiple 220 // alpha channels (it's possible to use those features by manipulating 221 // extra_channel_info directly). 222 // 223 // Callers must insert the actual channel image at the same index before any 224 // further modifications to extra_channel_info. 225 void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false); 226 227 bool HasAlpha() const { return GetAlphaBits() != 0; } 228 229 // Sets the original bit depth fields to indicate unsigned integer of the 230 // given bit depth. 231 // TODO(lode): move function to BitDepth 232 void SetUintSamples(uint32_t bits) { 233 bit_depth.bits_per_sample = bits; 234 bit_depth.exponent_bits_per_sample = 0; 235 bit_depth.floating_point_sample = false; 236 // RCT / Squeeze may add one bit each, and this is about int16_t, 237 // so uint13 should still be OK but limiting it to 12 seems safer. 238 // TODO(jon): figure out a better way to set this header field. 239 // (in particular, if modular mode is not used it doesn't matter, 240 // and if transforms are restricted, up to 15-bit could be done) 241 if (bits > 12) modular_16_bit_buffer_sufficient = false; 242 } 243 // Sets the original bit depth fields to indicate single precision floating 244 // point. 245 // TODO(lode): move function to BitDepth 246 void SetFloat32Samples() { 247 bit_depth.bits_per_sample = 32; 248 bit_depth.exponent_bits_per_sample = 8; 249 bit_depth.floating_point_sample = true; 250 modular_16_bit_buffer_sufficient = false; 251 } 252 253 void SetFloat16Samples() { 254 bit_depth.bits_per_sample = 16; 255 bit_depth.exponent_bits_per_sample = 5; 256 bit_depth.floating_point_sample = true; 257 modular_16_bit_buffer_sufficient = false; 258 } 259 260 void SetIntensityTarget(float intensity_target) { 261 tone_mapping.intensity_target = intensity_target; 262 } 263 float IntensityTarget() const { 264 JXL_DASSERT(tone_mapping.intensity_target != 0.0f); 265 return tone_mapping.intensity_target; 266 } 267 268 // Returns first ExtraChannelInfo of the given type, or nullptr if none. 269 const ExtraChannelInfo* Find(ExtraChannel type) const { 270 for (const ExtraChannelInfo& eci : extra_channel_info) { 271 if (eci.type == type) return &eci; 272 } 273 return nullptr; 274 } 275 276 // Returns first ExtraChannelInfo of the given type, or nullptr if none. 277 ExtraChannelInfo* Find(ExtraChannel type) { 278 for (ExtraChannelInfo& eci : extra_channel_info) { 279 if (eci.type == type) return &eci; 280 } 281 return nullptr; 282 } 283 284 Orientation GetOrientation() const { 285 return static_cast<Orientation>(orientation); 286 } 287 288 bool ExtraFieldsDefault() const; 289 290 std::string DebugString() const; 291 292 mutable bool all_default; 293 294 BitDepth bit_depth; 295 bool modular_16_bit_buffer_sufficient; // otherwise 32 is. 296 297 // Whether the colors values of the pixels of frames are encoded in the 298 // codestream using the absolute XYB color space, or the using values that 299 // follow the color space defined by the ColorEncoding or ICC profile. This 300 // determines when or whether a CMS (Color Management System) is needed to get 301 // the pixels in a desired color space. In one case, the pixels have one known 302 // color space and a CMS is needed to convert them to the original image's 303 // color space, in the other case the pixels have the color space of the 304 // original image and a CMS is required if a different display space, or a 305 // single known consistent color space for multiple decoded images, is 306 // desired. In all cases, the color space of all frames from a single image is 307 // the same, both VarDCT and modular frames. 308 // 309 // If true: then frames can be decoded to XYB (which can also be converted to 310 // linear and non-linear sRGB with the built in conversion without CMS). The 311 // attached ColorEncoding or ICC profile has no effect on the meaning of the 312 // pixel's color values, but instead indicates what the color profile of the 313 // original image was, and what color profile one should convert to when 314 // decoding to integers to prevent clipping and precision loss. To do that 315 // conversion requires a CMS. 316 // 317 // If false: then the color values of decoded frames are in the space defined 318 // by the attached ColorEncoding or ICC profile. To instead get the pixels in 319 // a chosen known color space, such as sRGB, requires a CMS, since the 320 // attached ColorEncoding or ICC profile could be any arbitrary color space. 321 // This mode is typically used for lossless images encoded as integers. 322 // Frames can also use YCbCr encoding, some frames may and some may not, but 323 // this is not a different color space but a certain encoding of the RGB 324 // values. 325 // 326 // Note: if !xyb_encoded, but the attached color profile indicates XYB (which 327 // can happen either if it's a ColorEncoding with color_space_ == 328 // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to 329 // represent XYB), then the frames still may not use ColorEncoding kXYB, they 330 // must still use kNone (or kYCbCr, which would mean applying the YCbCr 331 // transform to the 3-channel XYB data), since with !xyb_encoded, the 3 332 // channels are stored as-is, no matter what meaning the color profile assigns 333 // to them. To use ColorSpace::kXYB, xyb_encoded must be true. 334 // 335 // This value is defined in image metadata because this is the global 336 // codestream header. This value does not affect the image itself, so is not 337 // image metadata per se, it only affects the encoding, and what color space 338 // the decoder can receive the pixels in without needing a CMS. 339 bool xyb_encoded; 340 341 ColorEncoding color_encoding; 342 343 // These values are initialized to defaults such that the 'extra_fields' 344 // condition in VisitFields uses correctly initialized values. 345 uint32_t orientation = 1; 346 bool have_preview = false; 347 bool have_animation = false; 348 bool have_intrinsic_size = false; 349 350 // If present, the stored image has the dimensions of the first SizeHeader, 351 // but decoders are advised to resample or display per `intrinsic_size`. 352 SizeHeader intrinsic_size; // only if have_intrinsic_size 353 354 ToneMapping tone_mapping; 355 356 // When reading: deserialized. When writing: automatically set from vector. 357 uint32_t num_extra_channels; 358 std::vector<ExtraChannelInfo> extra_channel_info; 359 360 // Only present if m.have_preview. 361 PreviewHeader preview_size; 362 // Only present if m.have_animation. 363 AnimationHeader animation; 364 365 uint64_t extensions; 366 367 // Option to stop parsing after basic info, and treat as if the later 368 // fields do not participate. Use to parse only basic image information 369 // excluding the final larger or variable sized data. 370 bool nonserialized_only_parse_basic_info = false; 371 }; 372 373 Status ReadImageMetadata(BitReader* JXL_RESTRICT reader, 374 ImageMetadata* JXL_RESTRICT metadata); 375 376 Status WriteImageMetadata(const ImageMetadata& metadata, 377 BitWriter* JXL_RESTRICT writer, LayerType layer, 378 AuxOut* aux_out); 379 380 // All metadata applicable to the entire codestream (dimensions, extra channels, 381 // ...) 382 struct CodecMetadata { 383 // TODO(lode): use the preview and animation fields too, in place of the 384 // nonserialized_ ones in ImageMetadata. 385 ImageMetadata m; 386 // The size of the codestream: this is the nominal size applicable to all 387 // frames, although some frames can have a different effective size through 388 // crop, dc_level or representing a the preview. 389 SizeHeader size; 390 // Often default. 391 CustomTransformData transform_data; 392 393 size_t xsize() const { return size.xsize(); } 394 size_t ysize() const { return size.ysize(); } 395 size_t oriented_xsize(bool keep_orientation) const { 396 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 397 return ysize(); 398 } else { 399 return xsize(); 400 } 401 } 402 size_t oriented_preview_xsize(bool keep_orientation) const { 403 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 404 return m.preview_size.ysize(); 405 } else { 406 return m.preview_size.xsize(); 407 } 408 } 409 size_t oriented_ysize(bool keep_orientation) const { 410 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 411 return xsize(); 412 } else { 413 return ysize(); 414 } 415 } 416 size_t oriented_preview_ysize(bool keep_orientation) const { 417 if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) { 418 return m.preview_size.xsize(); 419 } else { 420 return m.preview_size.ysize(); 421 } 422 } 423 424 std::string DebugString() const; 425 }; 426 427 } // namespace jxl 428 429 #endif // LIB_JXL_IMAGE_METADATA_H_