iterator_enc.c (15231B)
1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // VP8Iterator: block iterator 11 // 12 // Author: Skal (pascal.massimino@gmail.com) 13 14 #include <string.h> 15 16 #include "src/dsp/cpu.h" 17 #include "src/dsp/dsp.h" 18 #include "src/enc/vp8i_enc.h" 19 #include "src/utils/utils.h" 20 #include "src/webp/types.h" 21 22 //------------------------------------------------------------------------------ 23 // VP8Iterator 24 //------------------------------------------------------------------------------ 25 26 static void InitLeft(VP8EncIterator* const it) { 27 it->y_left[-1] = it->u_left[-1] = it->v_left[-1] = 28 (it->y > 0) ? 129 : 127; 29 memset(it->y_left, 129, 16); 30 memset(it->u_left, 129, 8); 31 memset(it->v_left, 129, 8); 32 it->left_nz[8] = 0; 33 if (it->top_derr != NULL) { 34 memset(&it->left_derr, 0, sizeof(it->left_derr)); 35 } 36 } 37 38 static void InitTop(VP8EncIterator* const it) { 39 const VP8Encoder* const enc = it->enc; 40 const size_t top_size = enc->mb_w * 16; 41 memset(enc->y_top, 127, 2 * top_size); 42 memset(enc->nz, 0, enc->mb_w * sizeof(*enc->nz)); 43 if (enc->top_derr != NULL) { 44 memset(enc->top_derr, 0, enc->mb_w * sizeof(*enc->top_derr)); 45 } 46 } 47 48 void VP8IteratorSetRow(VP8EncIterator* const it, int y) { 49 VP8Encoder* const enc = it->enc; 50 it->x = 0; 51 it->y = y; 52 it->bw = &enc->parts[y & (enc->num_parts - 1)]; 53 it->preds = enc->preds + y * 4 * enc->preds_w; 54 it->nz = enc->nz; 55 it->mb = enc->mb_info + y * enc->mb_w; 56 it->y_top = enc->y_top; 57 it->uv_top = enc->uv_top; 58 InitLeft(it); 59 } 60 61 // restart a scan 62 static void VP8IteratorReset(VP8EncIterator* const it) { 63 VP8Encoder* const enc = it->enc; 64 VP8IteratorSetRow(it, 0); 65 VP8IteratorSetCountDown(it, enc->mb_w * enc->mb_h); // default 66 InitTop(it); 67 memset(it->bit_count, 0, sizeof(it->bit_count)); 68 it->do_trellis = 0; 69 } 70 71 void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) { 72 it->count_down = it->count_down0 = count_down; 73 } 74 75 int VP8IteratorIsDone(const VP8EncIterator* const it) { 76 return (it->count_down <= 0); 77 } 78 79 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { 80 it->enc = enc; 81 it->yuv_in = (uint8_t*)WEBP_ALIGN(it->yuv_mem); 82 it->yuv_out = it->yuv_in + YUV_SIZE_ENC; 83 it->yuv_out2 = it->yuv_out + YUV_SIZE_ENC; 84 it->yuv_p = it->yuv_out2 + YUV_SIZE_ENC; 85 it->lf_stats = enc->lf_stats; 86 it->percent0 = enc->percent; 87 it->y_left = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem + 1); 88 it->u_left = it->y_left + 16 + 16; 89 it->v_left = it->u_left + 16; 90 it->top_derr = enc->top_derr; 91 VP8IteratorReset(it); 92 } 93 94 int VP8IteratorProgress(const VP8EncIterator* const it, int delta) { 95 VP8Encoder* const enc = it->enc; 96 if (delta && enc->pic->progress_hook != NULL) { 97 const int done = it->count_down0 - it->count_down; 98 const int percent = (it->count_down0 <= 0) 99 ? it->percent0 100 : it->percent0 + delta * done / it->count_down0; 101 return WebPReportProgress(enc->pic, percent, &enc->percent); 102 } 103 return 1; 104 } 105 106 //------------------------------------------------------------------------------ 107 // Import the source samples into the cache. Takes care of replicating 108 // boundary pixels if necessary. 109 110 static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; } 111 112 static void ImportBlock(const uint8_t* src, int src_stride, 113 uint8_t* dst, int w, int h, int size) { 114 int i; 115 for (i = 0; i < h; ++i) { 116 memcpy(dst, src, w); 117 if (w < size) { 118 memset(dst + w, dst[w - 1], size - w); 119 } 120 dst += BPS; 121 src += src_stride; 122 } 123 for (i = h; i < size; ++i) { 124 memcpy(dst, dst - BPS, size); 125 dst += BPS; 126 } 127 } 128 129 static void ImportLine(const uint8_t* src, int src_stride, 130 uint8_t* dst, int len, int total_len) { 131 int i; 132 for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src; 133 for (; i < total_len; ++i) dst[i] = dst[len - 1]; 134 } 135 136 void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) { 137 const VP8Encoder* const enc = it->enc; 138 const int x = it->x, y = it->y; 139 const WebPPicture* const pic = enc->pic; 140 const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16; 141 const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8; 142 const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8; 143 const int w = MinSize(pic->width - x * 16, 16); 144 const int h = MinSize(pic->height - y * 16, 16); 145 const int uv_w = (w + 1) >> 1; 146 const int uv_h = (h + 1) >> 1; 147 148 ImportBlock(ysrc, pic->y_stride, it->yuv_in + Y_OFF_ENC, w, h, 16); 149 ImportBlock(usrc, pic->uv_stride, it->yuv_in + U_OFF_ENC, uv_w, uv_h, 8); 150 ImportBlock(vsrc, pic->uv_stride, it->yuv_in + V_OFF_ENC, uv_w, uv_h, 8); 151 152 if (tmp_32 == NULL) return; 153 154 // Import source (uncompressed) samples into boundary. 155 if (x == 0) { 156 InitLeft(it); 157 } else { 158 if (y == 0) { 159 it->y_left[-1] = it->u_left[-1] = it->v_left[-1] = 127; 160 } else { 161 it->y_left[-1] = ysrc[- 1 - pic->y_stride]; 162 it->u_left[-1] = usrc[- 1 - pic->uv_stride]; 163 it->v_left[-1] = vsrc[- 1 - pic->uv_stride]; 164 } 165 ImportLine(ysrc - 1, pic->y_stride, it->y_left, h, 16); 166 ImportLine(usrc - 1, pic->uv_stride, it->u_left, uv_h, 8); 167 ImportLine(vsrc - 1, pic->uv_stride, it->v_left, uv_h, 8); 168 } 169 170 it->y_top = tmp_32 + 0; 171 it->uv_top = tmp_32 + 16; 172 if (y == 0) { 173 memset(tmp_32, 127, 32 * sizeof(*tmp_32)); 174 } else { 175 ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16); 176 ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8); 177 ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8); 178 } 179 } 180 181 //------------------------------------------------------------------------------ 182 // Copy back the compressed samples into user space if requested. 183 184 static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride, 185 int w, int h) { 186 while (h-- > 0) { 187 memcpy(dst, src, w); 188 dst += dst_stride; 189 src += BPS; 190 } 191 } 192 193 void VP8IteratorExport(const VP8EncIterator* const it) { 194 const VP8Encoder* const enc = it->enc; 195 if (enc->config->show_compressed) { 196 const int x = it->x, y = it->y; 197 const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC; 198 const uint8_t* const usrc = it->yuv_out + U_OFF_ENC; 199 const uint8_t* const vsrc = it->yuv_out + V_OFF_ENC; 200 const WebPPicture* const pic = enc->pic; 201 uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16; 202 uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8; 203 uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8; 204 int w = (pic->width - x * 16); 205 int h = (pic->height - y * 16); 206 207 if (w > 16) w = 16; 208 if (h > 16) h = 16; 209 210 // Luma plane 211 ExportBlock(ysrc, ydst, pic->y_stride, w, h); 212 213 { // U/V planes 214 const int uv_w = (w + 1) >> 1; 215 const int uv_h = (h + 1) >> 1; 216 ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h); 217 ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h); 218 } 219 } 220 } 221 222 //------------------------------------------------------------------------------ 223 // Non-zero contexts setup/teardown 224 225 // Nz bits: 226 // 0 1 2 3 Y 227 // 4 5 6 7 228 // 8 9 10 11 229 // 12 13 14 15 230 // 16 17 U 231 // 18 19 232 // 20 21 V 233 // 22 23 234 // 24 DC-intra16 235 236 // Convert packed context to byte array 237 #define BIT(nz, n) (!!((nz) & (1 << (n)))) 238 239 void VP8IteratorNzToBytes(VP8EncIterator* const it) { 240 const int tnz = it->nz[0], lnz = it->nz[-1]; 241 int* const top_nz = it->top_nz; 242 int* const left_nz = it->left_nz; 243 244 // Top-Y 245 top_nz[0] = BIT(tnz, 12); 246 top_nz[1] = BIT(tnz, 13); 247 top_nz[2] = BIT(tnz, 14); 248 top_nz[3] = BIT(tnz, 15); 249 // Top-U 250 top_nz[4] = BIT(tnz, 18); 251 top_nz[5] = BIT(tnz, 19); 252 // Top-V 253 top_nz[6] = BIT(tnz, 22); 254 top_nz[7] = BIT(tnz, 23); 255 // DC 256 top_nz[8] = BIT(tnz, 24); 257 258 // left-Y 259 left_nz[0] = BIT(lnz, 3); 260 left_nz[1] = BIT(lnz, 7); 261 left_nz[2] = BIT(lnz, 11); 262 left_nz[3] = BIT(lnz, 15); 263 // left-U 264 left_nz[4] = BIT(lnz, 17); 265 left_nz[5] = BIT(lnz, 19); 266 // left-V 267 left_nz[6] = BIT(lnz, 21); 268 left_nz[7] = BIT(lnz, 23); 269 // left-DC is special, iterated separately 270 } 271 272 void VP8IteratorBytesToNz(VP8EncIterator* const it) { 273 uint32_t nz = 0; 274 const int* const top_nz = it->top_nz; 275 const int* const left_nz = it->left_nz; 276 // top 277 nz |= (top_nz[0] << 12) | (top_nz[1] << 13); 278 nz |= (top_nz[2] << 14) | (top_nz[3] << 15); 279 nz |= (top_nz[4] << 18) | (top_nz[5] << 19); 280 nz |= (top_nz[6] << 22) | (top_nz[7] << 23); 281 nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4 282 // left 283 nz |= (left_nz[0] << 3) | (left_nz[1] << 7); 284 nz |= (left_nz[2] << 11); 285 nz |= (left_nz[4] << 17) | (left_nz[6] << 21); 286 287 *it->nz = nz; 288 } 289 290 #undef BIT 291 292 //------------------------------------------------------------------------------ 293 // Advance to the next position, doing the bookkeeping. 294 295 void VP8IteratorSaveBoundary(VP8EncIterator* const it) { 296 VP8Encoder* const enc = it->enc; 297 const int x = it->x, y = it->y; 298 const uint8_t* const ysrc = it->yuv_out + Y_OFF_ENC; 299 const uint8_t* const uvsrc = it->yuv_out + U_OFF_ENC; 300 if (x < enc->mb_w - 1) { // left 301 int i; 302 for (i = 0; i < 16; ++i) { 303 it->y_left[i] = ysrc[15 + i * BPS]; 304 } 305 for (i = 0; i < 8; ++i) { 306 it->u_left[i] = uvsrc[7 + i * BPS]; 307 it->v_left[i] = uvsrc[15 + i * BPS]; 308 } 309 // top-left (before 'top'!) 310 it->y_left[-1] = it->y_top[15]; 311 it->u_left[-1] = it->uv_top[0 + 7]; 312 it->v_left[-1] = it->uv_top[8 + 7]; 313 } 314 if (y < enc->mb_h - 1) { // top 315 memcpy(it->y_top, ysrc + 15 * BPS, 16); 316 memcpy(it->uv_top, uvsrc + 7 * BPS, 8 + 8); 317 } 318 } 319 320 int VP8IteratorNext(VP8EncIterator* const it) { 321 if (++it->x == it->enc->mb_w) { 322 VP8IteratorSetRow(it, ++it->y); 323 } else { 324 it->preds += 4; 325 it->mb += 1; 326 it->nz += 1; 327 it->y_top += 16; 328 it->uv_top += 16; 329 } 330 return (0 < --it->count_down); 331 } 332 333 //------------------------------------------------------------------------------ 334 // Helper function to set mode properties 335 336 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) { 337 uint8_t* preds = it->preds; 338 int y; 339 for (y = 0; y < 4; ++y) { 340 memset(preds, mode, 4); 341 preds += it->enc->preds_w; 342 } 343 it->mb->type = 1; 344 } 345 346 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) { 347 uint8_t* preds = it->preds; 348 int y; 349 for (y = 4; y > 0; --y) { 350 memcpy(preds, modes, 4 * sizeof(*modes)); 351 preds += it->enc->preds_w; 352 modes += 4; 353 } 354 it->mb->type = 0; 355 } 356 357 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) { 358 it->mb->uv_mode = mode; 359 } 360 361 void VP8SetSkip(const VP8EncIterator* const it, int skip) { 362 it->mb->skip = skip; 363 } 364 365 void VP8SetSegment(const VP8EncIterator* const it, int segment) { 366 it->mb->segment = segment; 367 } 368 369 //------------------------------------------------------------------------------ 370 // Intra4x4 sub-blocks iteration 371 // 372 // We store and update the boundary samples into an array of 37 pixels. They 373 // are updated as we iterate and reconstructs each intra4x4 blocks in turn. 374 // The position of the samples has the following snake pattern: 375 // 376 // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right 377 // --+-----------+-----------+-----------+-----------+ 378 // 15| 19| 23| 27| 31| 379 // 14| 18| 22| 26| 30| 380 // 13| 17| 21| 25| 29| 381 // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28| 382 // --+-----------+-----------+-----------+-----------+ 383 // 11| 15| 19| 23| 27| 384 // 10| 14| 18| 22| 26| 385 // 9| 13| 17| 21| 25| 386 // 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24| 387 // --+-----------+-----------+-----------+-----------+ 388 // 7| 11| 15| 19| 23| 389 // 6| 10| 14| 18| 22| 390 // 5| 9| 13| 17| 21| 391 // 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20| 392 // --+-----------+-----------+-----------+-----------+ 393 // 3| 7| 11| 15| 19| 394 // 2| 6| 10| 14| 18| 395 // 1| 5| 9| 13| 17| 396 // 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16| 397 // --+-----------+-----------+-----------+-----------+ 398 399 // Array to record the position of the top sample to pass to the prediction 400 // functions in dsp.c. 401 static const uint8_t VP8TopLeftI4[16] = { 402 17, 21, 25, 29, 403 13, 17, 21, 25, 404 9, 13, 17, 21, 405 5, 9, 13, 17 406 }; 407 408 void VP8IteratorStartI4(VP8EncIterator* const it) { 409 const VP8Encoder* const enc = it->enc; 410 int i; 411 412 it->i4 = 0; // first 4x4 sub-block 413 it->i4_top = it->i4_boundary + VP8TopLeftI4[0]; 414 415 // Import the boundary samples 416 for (i = 0; i < 17; ++i) { // left 417 it->i4_boundary[i] = it->y_left[15 - i]; 418 } 419 for (i = 0; i < 16; ++i) { // top 420 it->i4_boundary[17 + i] = it->y_top[i]; 421 } 422 // top-right samples have a special case on the far right of the picture 423 if (it->x < enc->mb_w - 1) { 424 for (i = 16; i < 16 + 4; ++i) { 425 it->i4_boundary[17 + i] = it->y_top[i]; 426 } 427 } else { // else, replicate the last valid pixel four times 428 for (i = 16; i < 16 + 4; ++i) { 429 it->i4_boundary[17 + i] = it->i4_boundary[17 + 15]; 430 } 431 } 432 #if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN) 433 // Intra4Preds_NEON() reads 3 uninitialized bytes from 'i4_boundary' when top 434 // is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used 435 // meaningfully, but due to limitations in MemorySanitizer related to 436 // modeling of tbl instructions, a warning will be issued. This can be 437 // removed if MSan is updated to support the instructions. See 438 // https://issues.webmproject.org/372109644. 439 memset(it->i4_boundary + sizeof(it->i4_boundary) - 3, 0xaa, 3); 440 #endif 441 VP8IteratorNzToBytes(it); // import the non-zero context 442 } 443 444 int VP8IteratorRotateI4(VP8EncIterator* const it, 445 const uint8_t* const yuv_out) { 446 const uint8_t* const blk = yuv_out + VP8Scan[it->i4]; 447 uint8_t* const top = it->i4_top; 448 int i; 449 450 // Update the cache with 7 fresh samples 451 for (i = 0; i <= 3; ++i) { 452 top[-4 + i] = blk[i + 3 * BPS]; // store future top samples 453 } 454 if ((it->i4 & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15 455 for (i = 0; i <= 2; ++i) { // store future left samples 456 top[i] = blk[3 + (2 - i) * BPS]; 457 } 458 } else { // else replicate top-right samples, as says the specs. 459 for (i = 0; i <= 3; ++i) { 460 top[i] = top[i + 4]; 461 } 462 } 463 // move pointers to next sub-block 464 ++it->i4; 465 if (it->i4 == 16) { // we're done 466 return 0; 467 } 468 469 it->i4_top = it->i4_boundary + VP8TopLeftI4[it->i4]; 470 return 1; 471 } 472 473 //------------------------------------------------------------------------------