wedge.c (12139B)
1 /* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2018, Two Orioles, LLC 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 30 #include <stdint.h> 31 #include <string.h> 32 33 #include "common/intops.h" 34 35 #include "src/wedge.h" 36 37 enum WedgeDirectionType { 38 WEDGE_HORIZONTAL = 0, 39 WEDGE_VERTICAL = 1, 40 WEDGE_OBLIQUE27 = 2, 41 WEDGE_OBLIQUE63 = 3, 42 WEDGE_OBLIQUE117 = 4, 43 WEDGE_OBLIQUE153 = 5, 44 N_WEDGE_DIRECTIONS 45 }; 46 47 typedef struct { 48 uint8_t /* enum WedgeDirectionType */ direction; 49 uint8_t x_offset; 50 uint8_t y_offset; 51 } wedge_code_type; 52 53 static const wedge_code_type wedge_codebook_16_hgtw[16] = { 54 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 55 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 56 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, 57 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, 58 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 59 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 60 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 61 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 62 }; 63 64 static const wedge_code_type wedge_codebook_16_hltw[16] = { 65 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 66 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 67 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, 68 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, 69 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 70 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 71 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 72 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 73 }; 74 75 static const wedge_code_type wedge_codebook_16_heqw[16] = { 76 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 77 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 78 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, 79 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, 80 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 81 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 82 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 83 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 84 }; 85 86 Dav1dMasks dav1d_masks; 87 88 static void insert_border(uint8_t *const dst, const uint8_t *const src, 89 const int ctr) 90 { 91 if (ctr > 4) memset(dst, 0, ctr - 4); 92 memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8)); 93 if (ctr < 64 - 4) 94 memset(dst + ctr + 4, 64, 64 - 4 - ctr); 95 } 96 97 static void transpose(uint8_t *const dst, const uint8_t *const src) { 98 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) 99 for (int x = 0, x_off = 0; x < 64; x++, x_off += 64) 100 dst[x_off + y] = src[y_off + x]; 101 } 102 103 static void hflip(uint8_t *const dst, const uint8_t *const src) { 104 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64) 105 for (int x = 0; x < 64; x++) 106 dst[y_off + 64 - 1 - x] = src[y_off + x]; 107 } 108 109 static void copy2d(uint8_t *dst, const uint8_t *src, int sign, 110 const int w, const int h, const int x_off, const int y_off) 111 { 112 src += y_off * 64 + x_off; 113 if (sign) { 114 for (int y = 0; y < h; y++) { 115 for (int x = 0; x < w; x++) 116 dst[x] = 64 - src[x]; 117 src += 64; 118 dst += w; 119 } 120 } else { 121 for (int y = 0; y < h; y++) { 122 memcpy(dst, src, w); 123 src += 64; 124 dst += w; 125 } 126 } 127 } 128 129 #define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3)) 130 131 static COLD uint16_t init_chroma(uint8_t *chroma, const uint8_t *luma, 132 const int sign, const int w, const int h, 133 const int ss_ver) 134 { 135 const uint16_t offset = MASK_OFFSET(chroma); 136 for (int y = 0; y < h; y += 1 + ss_ver) { 137 for (int x = 0; x < w; x += 2) { 138 int sum = luma[x] + luma[x + 1] + 1; 139 if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1; 140 chroma[x >> 1] = (sum - sign) >> (1 + ss_ver); 141 } 142 luma += w << ss_ver; 143 chroma += w >> 1; 144 } 145 return offset; 146 } 147 148 static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs, 149 const uint8_t (*const master)[64 * 64], 150 const wedge_code_type *const cb, 151 uint8_t *masks_444, uint8_t *masks_422, 152 uint8_t *masks_420, unsigned signs) 153 { 154 const int n_stride_444 = (w * h); 155 const int n_stride_422 = n_stride_444 >> 1; 156 const int n_stride_420 = n_stride_444 >> 2; 157 const int sign_stride_422 = 16 * n_stride_422; 158 const int sign_stride_420 = 16 * n_stride_420; 159 160 // assign pointer offsets in lookup table 161 for (int n = 0; n < 16; n++) { 162 const int sign = signs & 1; 163 164 copy2d(masks_444, master[cb[n].direction], sign, w, h, 165 32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3)); 166 167 // not using !sign is intentional here, since 444 does not require 168 // any rounding since no chroma subsampling is applied. 169 dav1d_masks.offsets[0][bs].wedge[0][n] = 170 dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444); 171 172 dav1d_masks.offsets[1][bs].wedge[0][n] = 173 init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0); 174 dav1d_masks.offsets[1][bs].wedge[1][n] = 175 init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0); 176 dav1d_masks.offsets[2][bs].wedge[0][n] = 177 init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1); 178 dav1d_masks.offsets[2][bs].wedge[1][n] = 179 init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1); 180 181 signs >>= 1; 182 masks_444 += n_stride_444; 183 masks_422 += n_stride_422; 184 masks_420 += n_stride_420; 185 } 186 } 187 188 static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w, 189 const int h, const int step) 190 { 191 static const uint8_t ii_weights_1d[32] = { 192 60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7, 193 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 194 }; 195 196 uint8_t *const mask_h = &mask_v[w * h]; 197 uint8_t *const mask_sm = &mask_h[w * h]; 198 for (int y = 0, off = 0; y < h; y++, off += w) { 199 memset(&mask_v[off], ii_weights_1d[y * step], w); 200 for (int x = 0; x < w; x++) { 201 mask_sm[off + x] = ii_weights_1d[imin(x, y) * step]; 202 mask_h[off + x] = ii_weights_1d[x * step]; 203 } 204 } 205 } 206 207 COLD void dav1d_init_ii_wedge_masks(void) { 208 // This function is guaranteed to be called only once 209 210 enum WedgeMasterLineType { 211 WEDGE_MASTER_LINE_ODD, 212 WEDGE_MASTER_LINE_EVEN, 213 WEDGE_MASTER_LINE_VERT, 214 N_WEDGE_MASTER_LINES, 215 }; 216 static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = { 217 [WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 }, 218 [WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 }, 219 [WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 }, 220 }; 221 uint8_t master[6][64 * 64]; 222 223 // create master templates 224 for (int y = 0, off = 0; y < 64; y++, off += 64) 225 insert_border(&master[WEDGE_VERTICAL][off], 226 wedge_master_border[WEDGE_MASTER_LINE_VERT], 32); 227 for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--) 228 { 229 insert_border(&master[WEDGE_OBLIQUE63][off], 230 wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr); 231 insert_border(&master[WEDGE_OBLIQUE63][off + 64], 232 wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1); 233 } 234 235 transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]); 236 transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]); 237 hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]); 238 hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]); 239 240 #define fill(w, h, sz_422, sz_420, hvsw, signs) \ 241 fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \ 242 master, wedge_codebook_16_##hvsw, \ 243 dav1d_masks.wedge_444_##w##x##h, \ 244 dav1d_masks.wedge_422_##sz_422, \ 245 dav1d_masks.wedge_420_##sz_420, signs) 246 247 fill(32, 32, 16x32, 16x16, heqw, 0x7bfb); 248 fill(32, 16, 16x16, 16x8, hltw, 0x7beb); 249 fill(32, 8, 16x8, 16x4, hltw, 0x6beb); 250 fill(16, 32, 8x32, 8x16, hgtw, 0x7beb); 251 fill(16, 16, 8x16, 8x8, heqw, 0x7bfb); 252 fill(16, 8, 8x8, 8x4, hltw, 0x7beb); 253 fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb); 254 fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb); 255 fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb); 256 #undef fill 257 258 memset(dav1d_masks.ii_dc, 32, 32 * 32); 259 for (int c = 0; c < 3; c++) { 260 dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] = 261 dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] = 262 dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] = 263 dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] = 264 dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] = 265 dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] = 266 dav1d_masks.offsets[c][BS_8x8 -BS_32x32].ii[II_DC_PRED] = 267 MASK_OFFSET(dav1d_masks.ii_dc); 268 } 269 270 #define BUILD_NONDC_II_MASKS(w, h, step) \ 271 build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step) 272 273 #define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \ 274 dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \ 275 MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \ 276 dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \ 277 MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \ 278 dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \ 279 MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420]) 280 281 BUILD_NONDC_II_MASKS(32, 32, 1); 282 BUILD_NONDC_II_MASKS(16, 32, 1); 283 BUILD_NONDC_II_MASKS(16, 16, 2); 284 BUILD_NONDC_II_MASKS( 8, 32, 1); 285 BUILD_NONDC_II_MASKS( 8, 16, 2); 286 BUILD_NONDC_II_MASKS( 8, 8, 4); 287 BUILD_NONDC_II_MASKS( 4, 16, 2); 288 BUILD_NONDC_II_MASKS( 4, 8, 4); 289 BUILD_NONDC_II_MASKS( 4, 4, 8); 290 for (int p = 0; p < 3; p++) { 291 ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16); 292 ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16); 293 ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32, 8, 32, 8, 16); 294 ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16, 8, 16, 8, 8); 295 ASSIGN_NONDC_II_OFFSET(BS_16x8, 16, 16, 8, 8, 8, 8); 296 ASSIGN_NONDC_II_OFFSET(BS_8x16, 8, 16, 4, 16, 4, 8); 297 ASSIGN_NONDC_II_OFFSET(BS_8x8, 8, 8, 4, 8, 4, 4); 298 } 299 }