entropy_coding.cc (30139B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "lib/jpegli/entropy_coding.h" 7 8 #include <vector> 9 10 #include "lib/jpegli/encode_internal.h" 11 #include "lib/jpegli/error.h" 12 #include "lib/jpegli/huffman.h" 13 #include "lib/jxl/base/bits.h" 14 15 #undef HWY_TARGET_INCLUDE 16 #define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc" 17 #include <hwy/foreach_target.h> 18 #include <hwy/highway.h> 19 20 #include "lib/jpegli/entropy_coding-inl.h" 21 22 HWY_BEFORE_NAMESPACE(); 23 namespace jpegli { 24 namespace HWY_NAMESPACE { 25 26 void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx, 27 int ac_ctx, Token** tokens_ptr) { 28 ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx, 29 tokens_ptr); 30 } 31 32 // NOLINTNEXTLINE(google-readability-namespace-comments) 33 } // namespace HWY_NAMESPACE 34 } // namespace jpegli 35 HWY_AFTER_NAMESPACE(); 36 37 #if HWY_ONCE 38 namespace jpegli { 39 40 size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) { 41 int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); 42 size_t blocks_per_mcu = 0; 43 for (int c = 0; c < cinfo->num_components; ++c) { 44 jpeg_component_info* comp = &cinfo->comp_info[c]; 45 blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor; 46 } 47 return kDCTBlockSize * blocks_per_mcu * MCUs_per_row; 48 } 49 50 size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus, 51 size_t num_tokens, size_t max_per_row) { 52 size_t estimate; 53 if (mcu_y == 0) { 54 estimate = 16 * max_per_row; 55 } else { 56 estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y); 57 } 58 size_t mcus_left = ysize_mcus - mcu_y; 59 return std::min(mcus_left * max_per_row, 60 std::max(max_per_row, estimate - num_tokens)); 61 } 62 63 namespace { 64 HWY_EXPORT(ComputeTokensSequential); 65 66 void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al, 67 coeff_t* last_dc_coeff, Token** next_token) { 68 coeff_t temp2; 69 coeff_t temp; 70 temp2 = coeffs[0] >> Al; 71 temp = temp2 - *last_dc_coeff; 72 *last_dc_coeff = temp2; 73 temp2 = temp; 74 if (temp < 0) { 75 temp = -temp; 76 temp2--; 77 } 78 int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1); 79 int bits = temp2 & ((1 << nbits) - 1); 80 *(*next_token)++ = Token(context, nbits, bits); 81 } 82 83 void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index, 84 int context, ScanTokenInfo* sti) { 85 jpeg_comp_master* m = cinfo->master; 86 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; 87 const int comp_idx = scan_info->component_index[0]; 88 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; 89 const int Al = scan_info->Al; 90 const int Ss = scan_info->Ss; 91 const int Se = scan_info->Se; 92 const size_t restart_interval = sti->restart_interval; 93 int restarts_to_go = restart_interval; 94 size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; 95 size_t num_restarts = 96 restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; 97 size_t restart_idx = 0; 98 int eob_run = 0; 99 TokenArray* ta = &m->token_arrays[m->cur_token_array]; 100 sti->token_offset = m->total_num_tokens + ta->num_tokens; 101 sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE); 102 const auto emit_eob_run = [&]() { 103 int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run); 104 int symbol = nbits << 4u; 105 *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1)); 106 eob_run = 0; 107 }; 108 for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { 109 JBLOCKARRAY blocks = (*cinfo->mem->access_virt_barray)( 110 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by, 111 1, FALSE); 112 // Each coefficient can appear in at most one token, but we have to reserve 113 // one extra EOBrun token that was rolled over from the previous block-row 114 // and has to be flushed at the end. 115 int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1); 116 if (ta->num_tokens + max_tokens_per_row > m->num_tokens) { 117 if (ta->tokens) { 118 m->total_num_tokens += ta->num_tokens; 119 ++m->cur_token_array; 120 ta = &m->token_arrays[m->cur_token_array]; 121 } 122 m->num_tokens = 123 EstimateNumTokens(cinfo, by, comp->height_in_blocks, 124 m->total_num_tokens, max_tokens_per_row); 125 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); 126 m->next_token = ta->tokens; 127 } 128 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { 129 if (restart_interval > 0 && restarts_to_go == 0) { 130 if (eob_run > 0) emit_eob_run(); 131 ta->num_tokens = m->next_token - ta->tokens; 132 sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; 133 restarts_to_go = restart_interval; 134 } 135 const coeff_t* block = &blocks[0][bx][0]; 136 coeff_t temp2; 137 coeff_t temp; 138 int r = 0; 139 int num_nzeros = 0; 140 int num_future_nzeros = 0; 141 for (int k = Ss; k <= Se; ++k) { 142 temp = block[k]; 143 if (temp == 0) { 144 r++; 145 continue; 146 } 147 if (temp < 0) { 148 temp = -temp; 149 temp >>= Al; 150 temp2 = ~temp; 151 } else { 152 temp >>= Al; 153 temp2 = temp; 154 } 155 if (temp == 0) { 156 r++; 157 num_future_nzeros++; 158 continue; 159 } 160 if (eob_run > 0) emit_eob_run(); 161 while (r > 15) { 162 *m->next_token++ = Token(context, 0xf0, 0); 163 r -= 16; 164 } 165 int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1; 166 int symbol = (r << 4u) + nbits; 167 *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1)); 168 ++num_nzeros; 169 r = 0; 170 } 171 if (r > 0) { 172 ++eob_run; 173 if (eob_run == 0x7FFF) emit_eob_run(); 174 } 175 sti->num_nonzeros += num_nzeros; 176 sti->num_future_nonzeros += num_future_nzeros; 177 --restarts_to_go; 178 } 179 ta->num_tokens = m->next_token - ta->tokens; 180 } 181 if (eob_run > 0) { 182 emit_eob_run(); 183 ++ta->num_tokens; 184 } 185 sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset; 186 sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; 187 } 188 189 void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index, 190 ScanTokenInfo* sti) { 191 jpeg_comp_master* m = cinfo->master; 192 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; 193 const int comp_idx = scan_info->component_index[0]; 194 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; 195 const int Al = scan_info->Al; 196 const int Ss = scan_info->Ss; 197 const int Se = scan_info->Se; 198 const size_t restart_interval = sti->restart_interval; 199 int restarts_to_go = restart_interval; 200 RefToken token; 201 int eob_run = 0; 202 int eob_refbits = 0; 203 size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; 204 size_t num_restarts = 205 restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; 206 sti->tokens = m->next_refinement_token; 207 sti->refbits = m->next_refinement_bit; 208 sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE); 209 sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE); 210 RefToken* next_token = sti->tokens; 211 RefToken* next_eob_token = next_token; 212 uint8_t* next_ref_bit = sti->refbits; 213 uint16_t* next_eobrun = sti->eobruns; 214 size_t restart_idx = 0; 215 for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { 216 JBLOCKARRAY blocks = (*cinfo->mem->access_virt_barray)( 217 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by, 218 1, FALSE); 219 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { 220 if (restart_interval > 0 && restarts_to_go == 0) { 221 sti->restarts[restart_idx++] = next_token - sti->tokens; 222 restarts_to_go = restart_interval; 223 next_eob_token = next_token; 224 eob_run = eob_refbits = 0; 225 } 226 const coeff_t* block = &blocks[0][bx][0]; 227 int num_eob_refinement_bits = 0; 228 int num_refinement_bits = 0; 229 int num_nzeros = 0; 230 int r = 0; 231 for (int k = Ss; k <= Se; ++k) { 232 int absval = block[k]; 233 if (absval == 0) { 234 r++; 235 continue; 236 } 237 const int mask = absval >> (8 * sizeof(int) - 1); 238 absval += mask; 239 absval ^= mask; 240 absval >>= Al; 241 if (absval == 0) { 242 r++; 243 continue; 244 } 245 while (r > 15) { 246 token.symbol = 0xf0; 247 token.refbits = num_refinement_bits; 248 *next_token++ = token; 249 r -= 16; 250 num_eob_refinement_bits += num_refinement_bits; 251 num_refinement_bits = 0; 252 } 253 if (absval > 1) { 254 *next_ref_bit++ = absval & 1u; 255 ++num_refinement_bits; 256 continue; 257 } 258 int symbol = (r << 4u) + 1 + ((mask + 1) << 1); 259 token.symbol = symbol; 260 token.refbits = num_refinement_bits; 261 *next_token++ = token; 262 ++num_nzeros; 263 num_refinement_bits = 0; 264 num_eob_refinement_bits = 0; 265 r = 0; 266 next_eob_token = next_token; 267 eob_run = eob_refbits = 0; 268 } 269 if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) { 270 ++eob_run; 271 eob_refbits += num_eob_refinement_bits + num_refinement_bits; 272 if (eob_refbits > 255) { 273 ++next_eob_token; 274 eob_refbits = num_eob_refinement_bits + num_refinement_bits; 275 eob_run = 1; 276 } 277 next_token = next_eob_token; 278 next_token->refbits = eob_refbits; 279 if (eob_run == 1) { 280 next_token->symbol = 0; 281 } else if (eob_run == 2) { 282 next_token->symbol = 16; 283 *next_eobrun++ = 0; 284 } else if ((eob_run & (eob_run - 1)) == 0) { 285 next_token->symbol += 16; 286 next_eobrun[-1] = 0; 287 } else { 288 ++next_eobrun[-1]; 289 } 290 ++next_token; 291 if (eob_run == 0x7fff) { 292 next_eob_token = next_token; 293 eob_run = eob_refbits = 0; 294 } 295 } 296 sti->num_nonzeros += num_nzeros; 297 --restarts_to_go; 298 } 299 } 300 sti->num_tokens = next_token - sti->tokens; 301 sti->restarts[restart_idx++] = sti->num_tokens; 302 m->next_refinement_token = next_token; 303 m->next_refinement_bit = next_ref_bit; 304 } 305 306 void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset, 307 ScanTokenInfo* sti) { 308 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; 309 if (scan_info->Ss > 0) { 310 if (scan_info->Ah == 0) { 311 TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti); 312 } else { 313 TokenizeACRefinementScan(cinfo, scan_index, sti); 314 } 315 return; 316 } 317 318 jpeg_comp_master* m = cinfo->master; 319 size_t restart_interval = sti->restart_interval; 320 int restarts_to_go = restart_interval; 321 coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0}; 322 323 // "Non-interleaved" means color data comes in separate scans, in other words 324 // each scan can contain only one color component. 325 const bool is_interleaved = (scan_info->comps_in_scan > 1); 326 const bool is_progressive = FROM_JXL_BOOL(cinfo->progressive_mode); 327 const int Ah = scan_info->Ah; 328 const int Al = scan_info->Al; 329 HWY_ALIGN constexpr coeff_t kSinkBlock[DCTSIZE2] = {0}; 330 331 size_t restart_idx = 0; 332 TokenArray* ta = &m->token_arrays[m->cur_token_array]; 333 sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens; 334 335 if (Ah > 0) { 336 sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE); 337 } else if (cinfo->progressive_mode) { 338 if (ta->num_tokens + sti->num_blocks > m->num_tokens) { 339 if (ta->tokens) { 340 m->total_num_tokens += ta->num_tokens; 341 ++m->cur_token_array; 342 ta = &m->token_arrays[m->cur_token_array]; 343 } 344 m->num_tokens = sti->num_blocks; 345 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); 346 m->next_token = ta->tokens; 347 } 348 } 349 350 JBLOCKARRAY blocks[MAX_COMPS_IN_SCAN]; 351 size_t block_idx = 0; 352 for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) { 353 for (int i = 0; i < scan_info->comps_in_scan; ++i) { 354 int comp_idx = scan_info->component_index[i]; 355 jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; 356 int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; 357 int by0 = mcu_y * n_blocks_y; 358 int block_rows_left = comp->height_in_blocks - by0; 359 int max_block_rows = std::min(n_blocks_y, block_rows_left); 360 blocks[i] = (*cinfo->mem->access_virt_barray)( 361 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], 362 by0, max_block_rows, FALSE); 363 } 364 if (!cinfo->progressive_mode) { 365 int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); 366 if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { 367 if (ta->tokens) { 368 m->total_num_tokens += ta->num_tokens; 369 ++m->cur_token_array; 370 ta = &m->token_arrays[m->cur_token_array]; 371 } 372 m->num_tokens = 373 EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan, 374 m->total_num_tokens, max_tokens_per_mcu_row); 375 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); 376 m->next_token = ta->tokens; 377 } 378 } 379 for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) { 380 // Possibly emit a restart marker. 381 if (restart_interval > 0 && restarts_to_go == 0) { 382 restarts_to_go = restart_interval; 383 memset(last_dc_coeff, 0, sizeof(last_dc_coeff)); 384 ta->num_tokens = m->next_token - ta->tokens; 385 sti->restarts[restart_idx++] = 386 Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens; 387 } 388 // Encode one MCU 389 for (int i = 0; i < scan_info->comps_in_scan; ++i) { 390 int comp_idx = scan_info->component_index[i]; 391 jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; 392 int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; 393 int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1; 394 for (int iy = 0; iy < n_blocks_y; ++iy) { 395 for (int ix = 0; ix < n_blocks_x; ++ix) { 396 size_t block_y = mcu_y * n_blocks_y + iy; 397 size_t block_x = mcu_x * n_blocks_x + ix; 398 const coeff_t* block; 399 if (block_x >= comp->width_in_blocks || 400 block_y >= comp->height_in_blocks) { 401 block = kSinkBlock; 402 } else { 403 block = &blocks[i][iy][block_x][0]; 404 } 405 if (!is_progressive) { 406 HWY_DYNAMIC_DISPATCH(ComputeTokensSequential) 407 (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i, 408 &m->next_token); 409 last_dc_coeff[i] = block[0]; 410 } else { 411 if (Ah == 0) { 412 TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i, 413 &m->next_token); 414 } else { 415 sti->refbits[block_idx] = (block[0] >> Al) & 1; 416 } 417 } 418 ++block_idx; 419 } 420 } 421 } 422 --restarts_to_go; 423 } 424 ta->num_tokens = m->next_token - ta->tokens; 425 } 426 JXL_DASSERT(block_idx == sti->num_blocks); 427 sti->num_tokens = 428 Ah > 0 ? sti->num_blocks 429 : m->total_num_tokens + ta->num_tokens - sti->token_offset; 430 sti->restarts[restart_idx++] = 431 Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens; 432 if (Ah == 0 && cinfo->progressive_mode) { 433 JXL_DASSERT(sti->num_blocks == sti->num_tokens); 434 } 435 } 436 437 } // namespace 438 439 void TokenizeJpeg(j_compress_ptr cinfo) { 440 jpeg_comp_master* m = cinfo->master; 441 std::vector<int> processed(cinfo->num_scans); 442 size_t max_refinement_tokens = 0; 443 size_t num_refinement_bits = 0; 444 int num_refinement_scans[kMaxComponents][DCTSIZE2] = {}; 445 int max_num_refinement_scans = 0; 446 for (int i = 0; i < cinfo->num_scans; ++i) { 447 const jpeg_scan_info* si = &cinfo->scan_info[i]; 448 ScanTokenInfo* sti = &m->scan_token_info[i]; 449 if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) { 450 int offset = m->ac_ctx_offset[i]; 451 int comp_idx = si->component_index[0]; 452 TokenizeScan(cinfo, i, offset, sti); 453 processed[i] = 1; 454 max_refinement_tokens += sti->num_future_nonzeros; 455 for (int k = si->Ss; k <= si->Se; ++k) { 456 num_refinement_scans[comp_idx][k] = si->Al; 457 } 458 max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al); 459 num_refinement_bits += sti->num_nonzeros; 460 } 461 if (si->Ss > 0 && si->Ah > 0) { 462 int comp_idx = si->component_index[0]; 463 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; 464 size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks; 465 max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks; 466 } 467 } 468 if (max_refinement_tokens > 0) { 469 m->next_refinement_token = 470 Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE); 471 } 472 for (int j = 0; j < max_num_refinement_scans; ++j) { 473 uint8_t* refinement_bits = 474 Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE); 475 m->next_refinement_bit = refinement_bits; 476 size_t new_refinement_bits = 0; 477 for (int i = 0; i < cinfo->num_scans; ++i) { 478 const jpeg_scan_info* si = &cinfo->scan_info[i]; 479 int comp_idx = si->component_index[0]; 480 ScanTokenInfo* sti = &m->scan_token_info[i]; 481 if (si->Ss > 0 && si->Ah > 0 && 482 si->Ah == num_refinement_scans[comp_idx][si->Ss] - j) { 483 int offset = m->ac_ctx_offset[i]; 484 TokenizeScan(cinfo, i, offset, sti); 485 processed[i] = 1; 486 new_refinement_bits += sti->num_nonzeros; 487 } 488 } 489 JXL_DASSERT(m->next_refinement_bit <= 490 refinement_bits + num_refinement_bits); 491 num_refinement_bits += new_refinement_bits; 492 } 493 for (int i = 0; i < cinfo->num_scans; ++i) { 494 if (processed[i]) { 495 continue; 496 } 497 int offset = m->ac_ctx_offset[i]; 498 TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]); 499 processed[i] = 1; 500 } 501 } 502 503 namespace { 504 505 struct Histogram { 506 int count[kJpegHuffmanAlphabetSize]; 507 Histogram() { memset(count, 0, sizeof(count)); } 508 }; 509 510 void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) { 511 jpeg_comp_master* m = cinfo->master; 512 size_t num_token_arrays = m->cur_token_array + 1; 513 for (size_t i = 0; i < num_token_arrays; ++i) { 514 Token* tokens = m->token_arrays[i].tokens; 515 size_t num_tokens = m->token_arrays[i].num_tokens; 516 for (size_t j = 0; j < num_tokens; ++j) { 517 Token t = tokens[j]; 518 ++histograms[t.context].count[t.symbol]; 519 } 520 } 521 for (int i = 0; i < cinfo->num_scans; ++i) { 522 const jpeg_scan_info& si = cinfo->scan_info[i]; 523 const ScanTokenInfo& sti = m->scan_token_info[i]; 524 if (si.Ss > 0 && si.Ah > 0) { 525 int context = m->ac_ctx_offset[i]; 526 int* ac_histo = &histograms[context].count[0]; 527 for (size_t j = 0; j < sti.num_tokens; ++j) { 528 ++ac_histo[sti.tokens[j].symbol & 253]; 529 } 530 } 531 } 532 } 533 534 struct JpegClusteredHistograms { 535 std::vector<Histogram> histograms; 536 std::vector<uint32_t> histogram_indexes; 537 std::vector<uint32_t> slot_ids; 538 }; 539 540 float HistogramCost(const Histogram& histo) { 541 std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1); 542 std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1); 543 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { 544 counts[i] = histo.count[i]; 545 } 546 counts[kJpegHuffmanAlphabetSize] = 1; 547 CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, 548 depths.data()); 549 size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8; 550 size_t data_bits = 0; 551 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { 552 if (depths[i] > 0) { 553 header_bits += 8; 554 data_bits += counts[i] * depths[i]; 555 } 556 } 557 return header_bits + data_bits; 558 } 559 560 void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) { 561 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { 562 c->count[i] = a.count[i] + b.count[i]; 563 } 564 } 565 566 bool IsEmptyHistogram(const Histogram& histo) { 567 for (int count : histo.count) { 568 if (count) return false; 569 } 570 return true; 571 } 572 573 void ClusterJpegHistograms(j_compress_ptr cinfo, const Histogram* histograms, 574 size_t num, JpegClusteredHistograms* clusters) { 575 clusters->histogram_indexes.resize(num); 576 std::vector<uint32_t> slot_histograms; 577 std::vector<float> slot_costs; 578 for (size_t i = 0; i < num; ++i) { 579 const Histogram& cur = histograms[i]; 580 if (IsEmptyHistogram(cur)) { 581 continue; 582 } 583 float best_cost = HistogramCost(cur); 584 size_t best_slot = slot_histograms.size(); 585 for (size_t j = 0; j < slot_histograms.size(); ++j) { 586 size_t prev_idx = slot_histograms[j]; 587 const Histogram& prev = clusters->histograms[prev_idx]; 588 Histogram combined; 589 AddHistograms(prev, cur, &combined); 590 float combined_cost = HistogramCost(combined); 591 float cost = combined_cost - slot_costs[j]; 592 if (cost < best_cost) { 593 best_cost = cost; 594 best_slot = j; 595 } 596 } 597 if (best_slot == slot_histograms.size()) { 598 // Create new histogram. 599 size_t histogram_index = clusters->histograms.size(); 600 clusters->histograms.push_back(cur); 601 clusters->histogram_indexes[i] = histogram_index; 602 if (best_slot < 4) { 603 // We have a free slot, so we put the new histogram there. 604 slot_histograms.push_back(histogram_index); 605 slot_costs.push_back(best_cost); 606 } else { 607 // TODO(szabadka) Find the best histogram to replce. 608 best_slot = (clusters->slot_ids.back() + 1) % 4; 609 } 610 slot_histograms[best_slot] = histogram_index; 611 slot_costs[best_slot] = best_cost; 612 clusters->slot_ids.push_back(best_slot); 613 } else { 614 // Merge this histogram with a previous one. 615 size_t histogram_index = slot_histograms[best_slot]; 616 const Histogram& prev = clusters->histograms[histogram_index]; 617 AddHistograms(prev, cur, &clusters->histograms[histogram_index]); 618 clusters->histogram_indexes[i] = histogram_index; 619 JPEGLI_CHECK(clusters->slot_ids[histogram_index] == best_slot); 620 slot_costs[best_slot] += best_cost; 621 } 622 } 623 } 624 625 void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc, 626 int* inv_slot_map, uint8_t* slot_id_map, 627 JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) { 628 const char* type = is_dc ? "DC" : "AC"; 629 if (index < 0 || index >= NUM_HUFF_TBLS) { 630 JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index); 631 } 632 // Check if we have already copied this Huffman table. 633 int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS); 634 if (inv_slot_map[slot_idx] != -1) { 635 return; 636 } 637 inv_slot_map[slot_idx] = *num_huffman_tables; 638 // Look up and validate Huffman table. 639 JHUFF_TBL* table = 640 is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index]; 641 if (table == nullptr) { 642 JPEGLI_ERROR("Missing %s Huffman table %d", type, index); 643 } 644 ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc); 645 // Copy Huffman table to the end of the list and save slot id. 646 slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10); 647 memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL)); 648 ++(*num_huffman_tables); 649 } 650 651 void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) { 652 std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1); 653 std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1); 654 for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) { 655 counts[j] = histo.count[j]; 656 } 657 counts[kJpegHuffmanAlphabetSize] = 1; 658 CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, 659 depths.data()); 660 memset(table, 0, sizeof(JHUFF_TBL)); 661 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { 662 if (depths[i] > 0) { 663 ++table->bits[depths[i]]; 664 } 665 } 666 int offset[kJpegHuffmanMaxBitLength + 1] = {0}; 667 for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { 668 offset[i] = offset[i - 1] + table->bits[i - 1]; 669 } 670 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { 671 if (depths[i] > 0) { 672 table->huffval[offset[depths[i]]++] = i; 673 } 674 } 675 } 676 677 } // namespace 678 679 void CopyHuffmanTables(j_compress_ptr cinfo) { 680 jpeg_comp_master* m = cinfo->master; 681 size_t max_huff_tables = 2 * cinfo->num_components; 682 // Copy Huffman tables and save slot ids. 683 m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE); 684 m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE); 685 m->num_huffman_tables = 0; 686 int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; 687 for (int c = 0; c < cinfo->num_components; ++c) { 688 jpeg_component_info* comp = &cinfo->comp_info[c]; 689 CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0], 690 m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); 691 CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0], 692 m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); 693 } 694 // Compute context map. 695 m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE); 696 memset(m->context_map, 0, 8); 697 for (int c = 0; c < cinfo->num_components; ++c) { 698 m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no]; 699 } 700 int ac_ctx = 4; 701 for (int i = 0; i < cinfo->num_scans; ++i) { 702 const jpeg_scan_info* si = &cinfo->scan_info[i]; 703 if (si->Se > 0) { 704 for (int j = 0; j < si->comps_in_scan; ++j) { 705 int c = si->component_index[j]; 706 jpeg_component_info* comp = &cinfo->comp_info[c]; 707 m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4]; 708 } 709 } 710 } 711 } 712 713 void OptimizeHuffmanCodes(j_compress_ptr cinfo) { 714 jpeg_comp_master* m = cinfo->master; 715 // Build DC and AC histograms. 716 std::vector<Histogram> histograms(m->num_contexts); 717 BuildHistograms(cinfo, histograms.data()); 718 719 // Cluster DC histograms. 720 JpegClusteredHistograms dc_clusters; 721 ClusterJpegHistograms(cinfo, histograms.data(), cinfo->num_components, 722 &dc_clusters); 723 724 // Cluster AC histograms. 725 JpegClusteredHistograms ac_clusters; 726 ClusterJpegHistograms(cinfo, histograms.data() + 4, m->num_contexts - 4, 727 &ac_clusters); 728 729 // Create Huffman tables and slot ids clusters. 730 size_t num_dc_huff = dc_clusters.histograms.size(); 731 m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size(); 732 m->huffman_tables = 733 Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); 734 m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); 735 for (size_t i = 0; i < m->num_huffman_tables; ++i) { 736 JHUFF_TBL huff_table = {}; 737 if (i < dc_clusters.histograms.size()) { 738 m->slot_id_map[i] = i; 739 BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table); 740 } else { 741 m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff]; 742 BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff], 743 &huff_table); 744 } 745 memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table)); 746 } 747 748 // Create context map from clustered histogram indexes. 749 m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE); 750 memset(m->context_map, 0, m->num_contexts); 751 for (size_t i = 0; i < m->num_contexts; ++i) { 752 if (i < static_cast<size_t>(cinfo->num_components)) { 753 m->context_map[i] = dc_clusters.histogram_indexes[i]; 754 } else if (i >= 4) { 755 m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4]; 756 } 757 } 758 } 759 760 namespace { 761 762 constexpr uint8_t kNumExtraBits[256] = { 763 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 764 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 765 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 766 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 767 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 768 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 769 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 770 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 771 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 772 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 773 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 774 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 775 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 776 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 777 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 778 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 779 }; 780 781 void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) { 782 int huff_code[kJpegHuffmanAlphabetSize]; 783 // +1 for a sentinel element. 784 uint32_t huff_size[kJpegHuffmanAlphabetSize + 1]; 785 int p = 0; 786 for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) { 787 int i = table.bits[l]; 788 while (i--) huff_size[p++] = l; 789 } 790 791 // Reuse sentinel element. 792 int last_p = p; 793 huff_size[last_p] = 0; 794 795 int next_code = 0; 796 uint32_t si = huff_size[0]; 797 p = 0; 798 while (huff_size[p]) { 799 while ((huff_size[p]) == si) { 800 huff_code[p++] = next_code; 801 next_code++; 802 } 803 next_code <<= 1; 804 si++; 805 } 806 for (p = 0; p < last_p; p++) { 807 int i = table.huffval[p]; 808 int nbits = kNumExtraBits[i]; 809 code->depth[i] = huff_size[p] + nbits; 810 code->code[i] = huff_code[p] << nbits; 811 } 812 } 813 814 } // namespace 815 816 void InitEntropyCoder(j_compress_ptr cinfo) { 817 jpeg_comp_master* m = cinfo->master; 818 m->coding_tables = 819 Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); 820 for (size_t i = 0; i < m->num_huffman_tables; ++i) { 821 BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]); 822 } 823 } 824 825 } // namespace jpegli 826 #endif // HWY_ONCE