cdef_apply_tmpl.c (13979B)
1 /* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2018, Two Orioles, LLC 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 30 #include <string.h> 31 32 #include "common/intops.h" 33 34 #include "src/cdef_apply.h" 35 36 enum Backup2x8Flags { 37 BACKUP_2X8_Y = 1 << 0, 38 BACKUP_2X8_UV = 1 << 1, 39 }; 40 41 static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3], 42 const ptrdiff_t stride[2], 43 const enum Dav1dPixelLayout layout) 44 { 45 const ptrdiff_t y_stride = PXSTRIDE(stride[0]); 46 if (y_stride < 0) 47 pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride); 48 else 49 pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride); 50 51 if (layout != DAV1D_PIXEL_LAYOUT_I400) { 52 const ptrdiff_t uv_stride = PXSTRIDE(stride[1]); 53 if (uv_stride < 0) { 54 const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7; 55 pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride); 56 pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride); 57 } else { 58 const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6; 59 pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride); 60 pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride); 61 } 62 } 63 } 64 65 static void backup2x8(pixel dst[3][8][2], 66 /*const*/ pixel *const src[3], 67 const ptrdiff_t src_stride[2], int x_off, 68 const enum Dav1dPixelLayout layout, 69 const enum Backup2x8Flags flag) 70 { 71 ptrdiff_t y_off = 0; 72 if (flag & BACKUP_2X8_Y) { 73 for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0])) 74 pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2); 75 } 76 77 if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV)) 78 return; 79 80 const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; 81 const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; 82 83 x_off >>= ss_hor; 84 y_off = 0; 85 for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) { 86 pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2); 87 pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2); 88 } 89 } 90 91 static int adjust_strength(const int strength, const unsigned var) { 92 if (!var) return 0; 93 const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0; 94 return (strength * (4 + i) + 8) >> 4; 95 } 96 97 void bytefn(dav1d_cdef_brow)(Dav1dTaskContext *const tc, 98 pixel *const p[3], 99 const Av1Filter *const lflvl, 100 const int by_start, const int by_end, 101 const int sbrow_start, const int sby) 102 { 103 Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f; 104 const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8; 105 const Dav1dDSPContext *const dsp = f->dsp; 106 enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0); 107 pixel *ptrs[3] = { p[0], p[1], p[2] }; 108 const int sbsz = 16; 109 const int sb64w = f->sb128w << 1; 110 const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8; 111 const enum Dav1dPixelLayout layout = f->cur.p.layout; 112 const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; 113 const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; 114 const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; 115 static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, 116 { 7, 0, 2, 4, 5, 6, 6, 6 } }; 117 const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422]; 118 const int have_tt = f->c->n_tc > 1; 119 const int sb128 = f->seq_hdr->sb128; 120 const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; 121 const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]); 122 const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]); 123 124 for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) { 125 const int tf = tc->top_pre_cdef_toggle; 126 const int by_idx = (by & 30) >> 1; 127 if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM; 128 129 if ((!have_tt || sbrow_start || by + 2 < by_end) && 130 edges & CDEF_HAVE_BOTTOM) 131 { 132 // backup pre-filter data for next iteration 133 pixel *const cdef_top_bak[3] = { 134 f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride, 135 f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride, 136 f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride 137 }; 138 backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout); 139 } 140 141 ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]); 142 pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] }; 143 edges &= ~CDEF_HAVE_LEFT; 144 edges |= CDEF_HAVE_RIGHT; 145 enum Backup2x8Flags prev_flag = 0; 146 for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) { 147 const int sb128x = sbx >> 1; 148 const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); 149 const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx]; 150 if (cdef_idx == -1 || 151 (!f->frame_hdr->cdef.y_strength[cdef_idx] && 152 !f->frame_hdr->cdef.uv_strength[cdef_idx])) 153 { 154 last_skip = 1; 155 goto next_sb; 156 } 157 158 // Create a complete 32-bit mask for the sb row ahead of time. 159 const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx]; 160 const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 | 161 noskip_row[0][0]; 162 163 const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx]; 164 const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx]; 165 const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1); 166 167 const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8; 168 int y_sec_lvl = y_lvl & 3; 169 y_sec_lvl += y_sec_lvl == 3; 170 y_sec_lvl <<= bitdepth_min_8; 171 172 const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8; 173 int uv_sec_lvl = uv_lvl & 3; 174 uv_sec_lvl += uv_sec_lvl == 3; 175 uv_sec_lvl <<= bitdepth_min_8; 176 177 pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] }; 178 for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw); 179 bx += 2, edges |= CDEF_HAVE_LEFT) 180 { 181 if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT; 182 183 // check if this 8x8 block had any coded coefficients; if not, 184 // go to the next block 185 const uint32_t bx_mask = 3U << (bx & 30); 186 if (!(noskip_mask & bx_mask)) { 187 last_skip = 1; 188 goto next_b; 189 } 190 const int do_left = last_skip ? flag : (prev_flag ^ flag) & flag; 191 prev_flag = flag; 192 if (do_left && edges & CDEF_HAVE_LEFT) { 193 // we didn't backup the prefilter data because it wasn't 194 // there, so do it here instead 195 backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left); 196 } 197 if (edges & CDEF_HAVE_RIGHT) { 198 // backup pre-filter data for next iteration 199 backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag); 200 } 201 202 int dir; 203 unsigned variance; 204 if (y_pri_lvl || uv_pri_lvl) 205 dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0], 206 &variance HIGHBD_CALL_SUFFIX); 207 208 const pixel *top, *bot; 209 ptrdiff_t offset; 210 211 if (!have_tt) goto st_y; 212 if (sbrow_start && by == by_start) { 213 if (resize) { 214 offset = (sby - 1) * 4 * y_stride + bx * 4; 215 top = &f->lf.cdef_lpf_line[0][offset]; 216 } else { 217 offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4; 218 top = &f->lf.lr_lpf_line[0][offset]; 219 } 220 bot = bptrs[0] + 8 * y_stride; 221 } else if (!sbrow_start && by + 2 >= by_end) { 222 top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4]; 223 if (resize) { 224 offset = (sby * 4 + 2) * y_stride + bx * 4; 225 bot = &f->lf.cdef_lpf_line[0][offset]; 226 } else { 227 const int line = sby * (4 << sb128) + 4 * sb128 + 2; 228 offset = line * y_stride + bx * 4; 229 bot = &f->lf.lr_lpf_line[0][offset]; 230 } 231 } else { 232 st_y:; 233 offset = sby * 4 * y_stride; 234 top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4]; 235 bot = bptrs[0] + 8 * y_stride; 236 } 237 if (y_pri_lvl) { 238 const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); 239 if (adj_y_pri_lvl || y_sec_lvl) 240 dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], 241 top, bot, adj_y_pri_lvl, y_sec_lvl, 242 dir, damping, edges HIGHBD_CALL_SUFFIX); 243 } else if (y_sec_lvl) 244 dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], 245 top, bot, 0, y_sec_lvl, 0, damping, 246 edges HIGHBD_CALL_SUFFIX); 247 248 if (!uv_lvl) goto skip_uv; 249 assert(layout != DAV1D_PIXEL_LAYOUT_I400); 250 251 const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0; 252 for (int pl = 1; pl <= 2; pl++) { 253 if (!have_tt) goto st_uv; 254 if (sbrow_start && by == by_start) { 255 if (resize) { 256 offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor); 257 top = &f->lf.cdef_lpf_line[pl][offset]; 258 } else { 259 const int line = sby * (4 << sb128) - 4; 260 offset = line * uv_stride + (bx * 4 >> ss_hor); 261 top = &f->lf.lr_lpf_line[pl][offset]; 262 } 263 bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; 264 } else if (!sbrow_start && by + 2 >= by_end) { 265 const ptrdiff_t top_offset = sby * 8 * uv_stride + 266 (bx * 4 >> ss_hor); 267 top = &f->lf.cdef_line[tf][pl][top_offset]; 268 if (resize) { 269 offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor); 270 bot = &f->lf.cdef_lpf_line[pl][offset]; 271 } else { 272 const int line = sby * (4 << sb128) + 4 * sb128 + 2; 273 offset = line * uv_stride + (bx * 4 >> ss_hor); 274 bot = &f->lf.lr_lpf_line[pl][offset]; 275 } 276 } else { 277 st_uv:; 278 const ptrdiff_t offset = sby * 8 * uv_stride; 279 top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)]; 280 bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; 281 } 282 dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], 283 lr_bak[bit][pl], top, bot, 284 uv_pri_lvl, uv_sec_lvl, uvdir, 285 damping - 1, edges HIGHBD_CALL_SUFFIX); 286 } 287 288 skip_uv: 289 bit ^= 1; 290 last_skip = 0; 291 292 next_b: 293 bptrs[0] += 8; 294 bptrs[1] += 8 >> ss_hor; 295 bptrs[2] += 8 >> ss_hor; 296 } 297 298 next_sb: 299 iptrs[0] += sbsz * 4; 300 iptrs[1] += sbsz * 4 >> ss_hor; 301 iptrs[2] += sbsz * 4 >> ss_hor; 302 } 303 304 ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]); 305 ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; 306 ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; 307 tc->top_pre_cdef_toggle ^= 1; 308 } 309 }