fg_apply_tmpl.c (10068B)
1 /* 2 * Copyright © 2018, Niklas Haas 3 * Copyright © 2018, VideoLAN and dav1d authors 4 * Copyright © 2018, Two Orioles, LLC 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, this 11 * list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "config.h" 30 31 #include <stdint.h> 32 33 #include "dav1d/common.h" 34 #include "dav1d/picture.h" 35 36 #include "common/intops.h" 37 #include "common/bitdepth.h" 38 39 #include "src/fg_apply.h" 40 41 static void generate_scaling(const int bitdepth, 42 const uint8_t points[][2], const int num, 43 uint8_t scaling[SCALING_SIZE]) 44 { 45 #if BITDEPTH == 8 46 const int shift_x = 0; 47 const int scaling_size = SCALING_SIZE; 48 #else 49 assert(bitdepth > 8); 50 const int shift_x = bitdepth - 8; 51 const int scaling_size = 1 << bitdepth; 52 #endif 53 54 if (num == 0) { 55 memset(scaling, 0, scaling_size); 56 return; 57 } 58 59 // Fill up the preceding entries with the initial value 60 memset(scaling, points[0][1], points[0][0] << shift_x); 61 62 // Linearly interpolate the values in the middle 63 for (int i = 0; i < num - 1; i++) { 64 const int bx = points[i][0]; 65 const int by = points[i][1]; 66 const int ex = points[i+1][0]; 67 const int ey = points[i+1][1]; 68 const int dx = ex - bx; 69 const int dy = ey - by; 70 assert(dx > 0); 71 const int delta = dy * ((0x10000 + (dx >> 1)) / dx); 72 for (int x = 0, d = 0x8000; x < dx; x++) { 73 scaling[(bx + x) << shift_x] = by + (d >> 16); 74 d += delta; 75 } 76 } 77 78 // Fill up the remaining entries with the final value 79 const int n = points[num - 1][0] << shift_x; 80 memset(&scaling[n], points[num - 1][1], scaling_size - n); 81 82 #if BITDEPTH != 8 83 const int pad = 1 << shift_x, rnd = pad >> 1; 84 for (int i = 0; i < num - 1; i++) { 85 const int bx = points[i][0] << shift_x; 86 const int ex = points[i+1][0] << shift_x; 87 const int dx = ex - bx; 88 for (int x = 0; x < dx; x += pad) { 89 const int range = scaling[bx + x + pad] - scaling[bx + x]; 90 for (int n = 1, r = rnd; n < pad; n++) { 91 r += range; 92 scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x); 93 } 94 } 95 } 96 #endif 97 } 98 99 #ifndef UNIT_TEST 100 void bitfn(dav1d_prep_grain)(const Dav1dFilmGrainDSPContext *const dsp, 101 Dav1dPicture *const out, 102 const Dav1dPicture *const in, 103 uint8_t scaling[3][SCALING_SIZE], 104 entry grain_lut[3][GRAIN_HEIGHT+1][GRAIN_WIDTH]) 105 { 106 const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; 107 #if BITDEPTH != 8 108 const int bitdepth_max = (1 << out->p.bpc) - 1; 109 #endif 110 111 // Generate grain LUTs as needed 112 dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed 113 if (data->num_uv_points[0] || data->chroma_scaling_from_luma) 114 dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0], 115 data, 0 HIGHBD_TAIL_SUFFIX); 116 if (data->num_uv_points[1] || data->chroma_scaling_from_luma) 117 dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0], 118 data, 1 HIGHBD_TAIL_SUFFIX); 119 120 // Generate scaling LUTs as needed 121 if (data->num_y_points || data->chroma_scaling_from_luma) 122 generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]); 123 if (data->num_uv_points[0]) 124 generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]); 125 if (data->num_uv_points[1]) 126 generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]); 127 128 // Copy over the non-modified planes 129 assert(out->stride[0] == in->stride[0]); 130 if (!data->num_y_points) { 131 const ptrdiff_t stride = out->stride[0]; 132 const ptrdiff_t sz = out->p.h * stride; 133 if (sz < 0) 134 memcpy((uint8_t*) out->data[0] + sz - stride, 135 (uint8_t*) in->data[0] + sz - stride, -sz); 136 else 137 memcpy(out->data[0], in->data[0], sz); 138 } 139 140 if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) { 141 assert(out->stride[1] == in->stride[1]); 142 const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; 143 const ptrdiff_t stride = out->stride[1]; 144 const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride; 145 if (sz < 0) { 146 if (!data->num_uv_points[0]) 147 memcpy((uint8_t*) out->data[1] + sz - stride, 148 (uint8_t*) in->data[1] + sz - stride, -sz); 149 if (!data->num_uv_points[1]) 150 memcpy((uint8_t*) out->data[2] + sz - stride, 151 (uint8_t*) in->data[2] + sz - stride, -sz); 152 } else { 153 if (!data->num_uv_points[0]) 154 memcpy(out->data[1], in->data[1], sz); 155 if (!data->num_uv_points[1]) 156 memcpy(out->data[2], in->data[2], sz); 157 } 158 } 159 } 160 161 void bitfn(dav1d_apply_grain_row)(const Dav1dFilmGrainDSPContext *const dsp, 162 Dav1dPicture *const out, 163 const Dav1dPicture *const in, 164 const uint8_t scaling[3][SCALING_SIZE], 165 const entry grain_lut[3][GRAIN_HEIGHT+1][GRAIN_WIDTH], 166 const int row) 167 { 168 // Synthesize grain for the affected planes 169 const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data; 170 const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420; 171 const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444; 172 const int cpw = (out->p.w + ss_x) >> ss_x; 173 const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY; 174 pixel *const luma_src = 175 ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]); 176 #if BITDEPTH != 8 177 const int bitdepth_max = (1 << out->p.bpc) - 1; 178 #endif 179 180 if (data->num_y_points) { 181 const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE); 182 dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]), 183 luma_src, out->stride[0], data, 184 out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX); 185 } 186 187 if (!data->num_uv_points[0] && !data->num_uv_points[1] && 188 !data->chroma_scaling_from_luma) 189 { 190 return; 191 } 192 193 const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y; 194 195 // extend padding pixels 196 if (out->p.w & ss_x) { 197 pixel *ptr = luma_src; 198 for (int y = 0; y < bh; y++) { 199 ptr[out->p.w] = ptr[out->p.w - 1]; 200 ptr += PXSTRIDE(in->stride[0]) << ss_y; 201 } 202 } 203 204 const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y; 205 if (data->chroma_scaling_from_luma) { 206 for (int pl = 0; pl < 2; pl++) 207 dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, 208 ((const pixel *) in->data[1 + pl]) + uv_off, 209 in->stride[1], data, cpw, 210 scaling[0], grain_lut[1 + pl], 211 bh, row, luma_src, in->stride[0], 212 pl, is_id HIGHBD_TAIL_SUFFIX); 213 } else { 214 for (int pl = 0; pl < 2; pl++) 215 if (data->num_uv_points[pl]) 216 dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off, 217 ((const pixel *) in->data[1 + pl]) + uv_off, 218 in->stride[1], data, cpw, 219 scaling[1 + pl], grain_lut[1 + pl], 220 bh, row, luma_src, in->stride[0], 221 pl, is_id HIGHBD_TAIL_SUFFIX); 222 } 223 } 224 225 void bitfn(dav1d_apply_grain)(const Dav1dFilmGrainDSPContext *const dsp, 226 Dav1dPicture *const out, 227 const Dav1dPicture *const in) 228 { 229 ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]); 230 #if ARCH_X86_64 && BITDEPTH == 8 231 ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]); 232 #else 233 uint8_t scaling[3][SCALING_SIZE]; 234 #endif 235 const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE; 236 237 bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut); 238 for (int row = 0; row < rows; row++) 239 bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row); 240 } 241 #endif