blend_a64_mask.c (13442B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 14 #include "aom/aom_integer.h" 15 #include "aom_ports/mem.h" 16 #include "aom_dsp/blend.h" 17 #include "aom_dsp/aom_dsp_common.h" 18 19 #include "config/aom_dsp_rtcd.h" 20 21 // Blending with alpha mask. Mask values come from the range [0, 64], 22 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can 23 // be the same as dst, or dst can be different from both sources. 24 25 // NOTE(rachelbarker): The input and output of aom_blend_a64_d16_mask_c() are 26 // in a higher intermediate precision, and will later be rounded down to pixel 27 // precision. 28 // Thus, in order to avoid double-rounding, we want to use normal right shifts 29 // within this function, not ROUND_POWER_OF_TWO. 30 // This works because of the identity: 31 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z) 32 // 33 // In contrast, the output of the non-d16 functions will not be further rounded, 34 // so we *should* use ROUND_POWER_OF_TWO there. 35 36 void aom_lowbd_blend_a64_d16_mask_c( 37 uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, 38 uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, 39 const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, 40 ConvolveParams *conv_params) { 41 int i, j; 42 const int bd = 8; 43 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 44 const int round_offset = (1 << (offset_bits - conv_params->round_1)) + 45 (1 << (offset_bits - conv_params->round_1 - 1)); 46 const int round_bits = 47 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 48 49 assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride)); 50 assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride)); 51 52 assert(h >= 4); 53 assert(w >= 4); 54 assert(IS_POWER_OF_TWO(h)); 55 assert(IS_POWER_OF_TWO(w)); 56 57 if (subw == 0 && subh == 0) { 58 for (i = 0; i < h; ++i) { 59 for (j = 0; j < w; ++j) { 60 int32_t res; 61 const int m = mask[i * mask_stride + j]; 62 res = ((m * (int32_t)src0[i * src0_stride + j] + 63 (AOM_BLEND_A64_MAX_ALPHA - m) * 64 (int32_t)src1[i * src1_stride + j]) >> 65 AOM_BLEND_A64_ROUND_BITS); 66 res -= round_offset; 67 dst[i * dst_stride + j] = 68 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 69 } 70 } 71 } else if (subw == 1 && subh == 1) { 72 for (i = 0; i < h; ++i) { 73 for (j = 0; j < w; ++j) { 74 int32_t res; 75 const int m = ROUND_POWER_OF_TWO( 76 mask[(2 * i) * mask_stride + (2 * j)] + 77 mask[(2 * i + 1) * mask_stride + (2 * j)] + 78 mask[(2 * i) * mask_stride + (2 * j + 1)] + 79 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 80 2); 81 res = ((m * (int32_t)src0[i * src0_stride + j] + 82 (AOM_BLEND_A64_MAX_ALPHA - m) * 83 (int32_t)src1[i * src1_stride + j]) >> 84 AOM_BLEND_A64_ROUND_BITS); 85 res -= round_offset; 86 dst[i * dst_stride + j] = 87 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 88 } 89 } 90 } else if (subw == 1 && subh == 0) { 91 for (i = 0; i < h; ++i) { 92 for (j = 0; j < w; ++j) { 93 int32_t res; 94 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 95 mask[i * mask_stride + (2 * j + 1)]); 96 res = ((m * (int32_t)src0[i * src0_stride + j] + 97 (AOM_BLEND_A64_MAX_ALPHA - m) * 98 (int32_t)src1[i * src1_stride + j]) >> 99 AOM_BLEND_A64_ROUND_BITS); 100 res -= round_offset; 101 dst[i * dst_stride + j] = 102 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 103 } 104 } 105 } else { 106 for (i = 0; i < h; ++i) { 107 for (j = 0; j < w; ++j) { 108 int32_t res; 109 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 110 mask[(2 * i + 1) * mask_stride + j]); 111 res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] + 112 (AOM_BLEND_A64_MAX_ALPHA - m) * 113 (int32_t)src1[i * src1_stride + j]) >> 114 AOM_BLEND_A64_ROUND_BITS); 115 res -= round_offset; 116 dst[i * dst_stride + j] = 117 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 118 } 119 } 120 } 121 } 122 123 #if CONFIG_AV1_HIGHBITDEPTH 124 void aom_highbd_blend_a64_d16_mask_c( 125 uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0, 126 uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, 127 const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, 128 ConvolveParams *conv_params, const int bd) { 129 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 130 const int round_offset = (1 << (offset_bits - conv_params->round_1)) + 131 (1 << (offset_bits - conv_params->round_1 - 1)); 132 const int round_bits = 133 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 134 uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); 135 136 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 137 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 138 139 assert(h >= 1); 140 assert(w >= 1); 141 assert(IS_POWER_OF_TWO(h)); 142 assert(IS_POWER_OF_TWO(w)); 143 144 // excerpt from clip_pixel_highbd() 145 // set saturation_value to (1 << bd) - 1 146 unsigned int saturation_value; 147 switch (bd) { 148 case 8: 149 default: saturation_value = 255; break; 150 case 10: saturation_value = 1023; break; 151 case 12: saturation_value = 4095; break; 152 } 153 154 if (subw == 0 && subh == 0) { 155 for (int i = 0; i < h; ++i) { 156 for (int j = 0; j < w; ++j) { 157 int32_t res; 158 const int m = mask[j]; 159 res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 160 AOM_BLEND_A64_ROUND_BITS); 161 res -= round_offset; 162 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 163 dst[j] = AOMMIN(v, saturation_value); 164 } 165 mask += mask_stride; 166 src0 += src0_stride; 167 src1 += src1_stride; 168 dst += dst_stride; 169 } 170 } else if (subw == 1 && subh == 1) { 171 for (int i = 0; i < h; ++i) { 172 for (int j = 0; j < w; ++j) { 173 int32_t res; 174 const int m = ROUND_POWER_OF_TWO( 175 mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] + 176 mask[mask_stride + 2 * j + 1], 177 2); 178 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 179 AOM_BLEND_A64_ROUND_BITS; 180 res -= round_offset; 181 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 182 dst[j] = AOMMIN(v, saturation_value); 183 } 184 mask += 2 * mask_stride; 185 src0 += src0_stride; 186 src1 += src1_stride; 187 dst += dst_stride; 188 } 189 } else if (subw == 1 && subh == 0) { 190 for (int i = 0; i < h; ++i) { 191 for (int j = 0; j < w; ++j) { 192 int32_t res; 193 const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]); 194 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 195 AOM_BLEND_A64_ROUND_BITS; 196 res -= round_offset; 197 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 198 dst[j] = AOMMIN(v, saturation_value); 199 } 200 mask += mask_stride; 201 src0 += src0_stride; 202 src1 += src1_stride; 203 dst += dst_stride; 204 } 205 } else { 206 for (int i = 0; i < h; ++i) { 207 for (int j = 0; j < w; ++j) { 208 int32_t res; 209 const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]); 210 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 211 AOM_BLEND_A64_ROUND_BITS; 212 res -= round_offset; 213 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 214 dst[j] = AOMMIN(v, saturation_value); 215 } 216 mask += 2 * mask_stride; 217 src0 += src0_stride; 218 src1 += src1_stride; 219 dst += dst_stride; 220 } 221 } 222 } 223 #endif // CONFIG_AV1_HIGHBITDEPTH 224 225 // Blending with alpha mask. Mask values come from the range [0, 64], 226 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can 227 // be the same as dst, or dst can be different from both sources. 228 229 void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, 230 const uint8_t *src0, uint32_t src0_stride, 231 const uint8_t *src1, uint32_t src1_stride, 232 const uint8_t *mask, uint32_t mask_stride, int w, 233 int h, int subw, int subh) { 234 int i, j; 235 236 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 237 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 238 239 assert(h >= 1); 240 assert(w >= 1); 241 assert(IS_POWER_OF_TWO(h)); 242 assert(IS_POWER_OF_TWO(w)); 243 244 if (subw == 0 && subh == 0) { 245 for (i = 0; i < h; ++i) { 246 for (j = 0; j < w; ++j) { 247 const int m = mask[i * mask_stride + j]; 248 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 249 src1[i * src1_stride + j]); 250 } 251 } 252 } else if (subw == 1 && subh == 1) { 253 for (i = 0; i < h; ++i) { 254 for (j = 0; j < w; ++j) { 255 const int m = ROUND_POWER_OF_TWO( 256 mask[(2 * i) * mask_stride + (2 * j)] + 257 mask[(2 * i + 1) * mask_stride + (2 * j)] + 258 mask[(2 * i) * mask_stride + (2 * j + 1)] + 259 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 260 2); 261 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 262 src1[i * src1_stride + j]); 263 } 264 } 265 } else if (subw == 1 && subh == 0) { 266 for (i = 0; i < h; ++i) { 267 for (j = 0; j < w; ++j) { 268 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 269 mask[i * mask_stride + (2 * j + 1)]); 270 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 271 src1[i * src1_stride + j]); 272 } 273 } 274 } else { 275 for (i = 0; i < h; ++i) { 276 for (j = 0; j < w; ++j) { 277 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 278 mask[(2 * i + 1) * mask_stride + j]); 279 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 280 src1[i * src1_stride + j]); 281 } 282 } 283 } 284 } 285 286 #if CONFIG_AV1_HIGHBITDEPTH 287 void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, 288 const uint8_t *src0_8, uint32_t src0_stride, 289 const uint8_t *src1_8, uint32_t src1_stride, 290 const uint8_t *mask, uint32_t mask_stride, 291 int w, int h, int subw, int subh, int bd) { 292 int i, j; 293 uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); 294 const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); 295 const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8); 296 (void)bd; 297 298 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 299 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 300 301 assert(h >= 1); 302 assert(w >= 1); 303 assert(IS_POWER_OF_TWO(h)); 304 assert(IS_POWER_OF_TWO(w)); 305 306 assert(bd == 8 || bd == 10 || bd == 12); 307 308 if (subw == 0 && subh == 0) { 309 for (i = 0; i < h; ++i) { 310 for (j = 0; j < w; ++j) { 311 const int m = mask[i * mask_stride + j]; 312 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 313 src1[i * src1_stride + j]); 314 } 315 } 316 } else if (subw == 1 && subh == 1) { 317 for (i = 0; i < h; ++i) { 318 for (j = 0; j < w; ++j) { 319 const int m = ROUND_POWER_OF_TWO( 320 mask[(2 * i) * mask_stride + (2 * j)] + 321 mask[(2 * i + 1) * mask_stride + (2 * j)] + 322 mask[(2 * i) * mask_stride + (2 * j + 1)] + 323 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 324 2); 325 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 326 src1[i * src1_stride + j]); 327 } 328 } 329 } else if (subw == 1 && subh == 0) { 330 for (i = 0; i < h; ++i) { 331 for (j = 0; j < w; ++j) { 332 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 333 mask[i * mask_stride + (2 * j + 1)]); 334 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 335 src1[i * src1_stride + j]); 336 } 337 } 338 } else { 339 for (i = 0; i < h; ++i) { 340 for (j = 0; j < w; ++j) { 341 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 342 mask[(2 * i + 1) * mask_stride + j]); 343 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 344 src1[i * src1_stride + j]); 345 } 346 } 347 } 348 } 349 #endif // CONFIG_AV1_HIGHBITDEPTH