saliency_map.c (46890B)
1 /* 2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #include <assert.h> 12 #include <float.h> 13 #include <string.h> 14 15 #include "av1/encoder/encoder.h" 16 #include "av1/encoder/encoder_utils.h" 17 #include "av1/encoder/firstpass.h" 18 #include "av1/encoder/rdopt.h" 19 #include "av1/encoder/saliency_map.h" 20 21 // The Gabor filter is generated by setting the parameters as: 22 // ksize = 9 23 // sigma = 1 24 // theta = y*np.pi/4, where y /in {0, 1, 2, 3}, i.e., 0, 45, 90, 135 degree 25 // lambda1 = 1 26 // gamma=0.8 27 // phi =0 28 static const double kGaborFilter[4][9][9] = { // [angle: 0, 45, 90, 135 29 // degree][ksize][ksize] 30 { { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03, 31 3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 }, 32 { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02, 33 3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 }, 34 { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01, 35 1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 }, 36 { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01, 37 4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 }, 38 { 3.3546262e-04, 1.1108996e-02, 1.3533528e-01, 6.0653067e-01, 1.0000000e+00, 39 6.0653067e-01, 1.3533528e-01, 1.1108996e-02, 3.3546262e-04 }, 40 { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01, 41 4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 }, 42 { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01, 43 1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 }, 44 { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02, 45 3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 }, 46 { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03, 47 3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 } }, 48 49 { { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04, 50 6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05, 51 3.5712848e-05 }, 52 { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03, 53 1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03, 54 -8.1631159e-05 }, 55 { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02, 56 -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03, 57 -9.9486928e-04 }, 58 { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01, 59 -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02, 60 1.3962291e-03 }, 61 { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01, 62 1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02, 63 6.6981313e-04 }, 64 { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01, 65 -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03, 66 -4.4602581e-04 }, 67 { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02, 68 -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03, 69 3.0079011e-06 }, 70 { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02, 71 1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06, 72 3.8760313e-06 }, 73 { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03, 74 6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06, 75 -6.2165498e-08 } }, 76 77 { { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04, 78 2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 }, 79 { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02, 80 8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 }, 81 { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01, 82 9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 }, 83 { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01, 84 4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 }, 85 { 5.9760227e-03, 5.6134764e-02, 2.7803731e-01, 7.2614902e-01, 1.0000000e+00, 86 7.2614902e-01, 2.7803731e-01, 5.6134764e-02, 5.9760227e-03 }, 87 { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01, 88 4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 }, 89 { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01, 90 9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 }, 91 { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02, 92 8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 }, 93 { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04, 94 2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 } }, 95 96 { { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03, 97 6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06, 98 -6.2165498e-08 }, 99 { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02, 100 1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06, 101 3.8760313e-06 }, 102 { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02, 103 -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03, 104 3.0079011e-06 }, 105 { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01, 106 -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03, 107 -4.4602581e-04 }, 108 { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01, 109 1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02, 110 6.6981313e-04 }, 111 { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01, 112 -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02, 113 1.3962291e-03 }, 114 { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02, 115 -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03, 116 -9.9486928e-04 }, 117 { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03, 118 1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03, 119 -8.1631159e-05 }, 120 { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04, 121 6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05, 122 3.5712848e-05 } } 123 }; 124 125 // This function is to extract red/green/blue channels, and calculate intensity 126 // = (r+g+b)/3. Note that it only handles 8bits case now. 127 // TODO(linzhen): add high bitdepth support. 128 static void get_color_intensity(const YV12_BUFFER_CONFIG *src, 129 int subsampling_x, int subsampling_y, 130 double *cr, double *cg, double *cb, 131 double *intensity) { 132 const uint8_t *y = src->buffers[0]; 133 const uint8_t *u = src->buffers[1]; 134 const uint8_t *v = src->buffers[2]; 135 136 const int y_height = src->crop_heights[0]; 137 const int y_width = src->crop_widths[0]; 138 const int y_stride = src->strides[0]; 139 const int c_stride = src->strides[1]; 140 141 for (int i = 0; i < y_height; ++i) { 142 for (int j = 0; j < y_width; ++j) { 143 cr[i * y_width + j] = 144 fclamp((double)y[i * y_stride + j] + 145 1.370 * (double)(v[(i >> subsampling_y) * c_stride + 146 (j >> subsampling_x)] - 147 128), 148 0, 255); 149 cg[i * y_width + j] = 150 fclamp((double)y[i * y_stride + j] - 151 0.698 * (double)(u[(i >> subsampling_y) * c_stride + 152 (j >> subsampling_x)] - 153 128) - 154 0.337 * (double)(v[(i >> subsampling_y) * c_stride + 155 (j >> subsampling_x)] - 156 128), 157 0, 255); 158 cb[i * y_width + j] = 159 fclamp((double)y[i * y_stride + j] + 160 1.732 * (double)(u[(i >> subsampling_y) * c_stride + 161 (j >> subsampling_x)] - 162 128), 163 0, 255); 164 165 intensity[i * y_width + j] = 166 (cr[i * y_width + j] + cg[i * y_width + j] + cb[i * y_width + j]) / 167 3.0; 168 assert(intensity[i * y_width + j] >= 0 && 169 intensity[i * y_width + j] <= 255); 170 171 intensity[i * y_width + j] /= 256; 172 cr[i * y_width + j] /= 256; 173 cg[i * y_width + j] /= 256; 174 cb[i * y_width + j] /= 256; 175 } 176 } 177 } 178 179 static inline double convolve_map(const double *filter, const double *map, 180 const int size) { 181 double result = 0; 182 for (int i = 0; i < size; ++i) { 183 result += filter[i] * map[i]; // symmetric filter is used 184 } 185 return result; 186 } 187 188 // This function is to decimate the map by half, and apply Gaussian filter on 189 // top of the downsampled map. 190 static inline void decimate_map(const double *map, int height, int width, 191 int stride, double *downsampled_map) { 192 const int new_width = width / 2; 193 const int window_size = 5; 194 const double gaussian_filter[25] = { 195 1. / 256, 1.0 / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16, 196 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32, 197 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256, 198 1. / 64, 3. / 128, 1. / 64, 1. / 256 199 }; 200 201 double map_region[25]; 202 for (int y = 0; y < height - 1; y += 2) { 203 for (int x = 0; x < width - 1; x += 2) { 204 int i = 0; 205 for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) { 206 for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) { 207 int yvalue = clamp(yy, 0, height - 1); 208 int xvalue = clamp(xx, 0, width - 1); 209 map_region[i++] = map[yvalue * stride + xvalue]; 210 } 211 } 212 downsampled_map[(y / 2) * new_width + (x / 2)] = 213 convolve_map(gaussian_filter, map_region, window_size * window_size); 214 } 215 } 216 } 217 218 // This function is to upscale the map from in_level size to out_level size. 219 // Note that the map at "level-1" will upscale the map at "level" by x2. 220 static inline int upscale_map(const double *input, int in_level, int out_level, 221 int height[9], int width[9], double *output) { 222 for (int level = in_level; level > out_level; level--) { 223 const int cur_width = width[level]; 224 const int cur_height = height[level]; 225 const int cur_stride = width[level]; 226 227 double *original = (level == in_level) ? (double *)input : output; 228 229 assert(level > 0); 230 231 const int h_upscale = height[level - 1]; 232 const int w_upscale = width[level - 1]; 233 const int s_upscale = width[level - 1]; 234 235 double *upscale = aom_malloc(h_upscale * w_upscale * sizeof(*upscale)); 236 237 if (!upscale) { 238 return 0; 239 } 240 241 for (int i = 0; i < h_upscale; ++i) { 242 for (int j = 0; j < w_upscale; ++j) { 243 const int ii = clamp((i >> 1), 0, cur_height - 1); 244 const int jj = clamp((j >> 1), 0, cur_width - 1); 245 upscale[j + i * s_upscale] = (double)original[jj + ii * cur_stride]; 246 } 247 } 248 memcpy(output, upscale, h_upscale * w_upscale * sizeof(double)); 249 aom_free(upscale); 250 } 251 252 return 1; 253 } 254 255 // This function calculates the differences between a fine scale c and a 256 // coarser scale s yielding the feature maps. c \in {2, 3, 4}, and s = c + 257 // delta, where delta \in {3, 4}. 258 static int center_surround_diff(const double *input[9], int height[9], 259 int width[9], saliency_feature_map *output[6]) { 260 int j = 0; 261 for (int k = 2; k < 5; ++k) { 262 int cur_height = height[k]; 263 int cur_width = width[k]; 264 265 if (upscale_map(input[k + 3], k + 3, k, height, width, output[j]->buf) == 266 0) { 267 return 0; 268 } 269 270 for (int r = 0; r < cur_height; ++r) { 271 for (int c = 0; c < cur_width; ++c) { 272 output[j]->buf[r * cur_width + c] = 273 fabs((double)(input[k][r * cur_width + c] - 274 output[j]->buf[r * cur_width + c])); 275 } 276 } 277 278 if (upscale_map(input[k + 4], k + 4, k, height, width, 279 output[j + 1]->buf) == 0) { 280 return 0; 281 } 282 283 for (int r = 0; r < cur_height; ++r) { 284 for (int c = 0; c < cur_width; ++c) { 285 output[j + 1]->buf[r * cur_width + c] = 286 fabs(input[k][r * cur_width + c] - 287 output[j + 1]->buf[r * cur_width + c]); 288 } 289 } 290 291 j += 2; 292 } 293 return 1; 294 } 295 296 // For color channels, the differences is calculated based on "color 297 // double-opponency". For example, the RG feature map is constructed between a 298 // fine scale c of R-G component and a coarser scale s of G-R component. 299 static int center_surround_diff_rgb(const double *input_1[9], 300 const double *input_2[9], int height[9], 301 int width[9], 302 saliency_feature_map *output[6]) { 303 int j = 0; 304 for (int k = 2; k < 5; ++k) { 305 int cur_height = height[k]; 306 int cur_width = width[k]; 307 308 if (upscale_map(input_2[k + 3], k + 3, k, height, width, output[j]->buf) == 309 0) { 310 return 0; 311 } 312 313 for (int r = 0; r < cur_height; ++r) { 314 for (int c = 0; c < cur_width; ++c) { 315 output[j]->buf[r * cur_width + c] = 316 fabs((double)(input_1[k][r * cur_width + c] - 317 output[j]->buf[r * cur_width + c])); 318 } 319 } 320 321 if (upscale_map(input_2[k + 4], k + 4, k, height, width, 322 output[j + 1]->buf) == 0) { 323 return 0; 324 } 325 326 for (int r = 0; r < cur_height; ++r) { 327 for (int c = 0; c < cur_width; ++c) { 328 output[j + 1]->buf[r * cur_width + c] = 329 fabs(input_1[k][r * cur_width + c] - 330 output[j + 1]->buf[r * cur_width + c]); 331 } 332 } 333 334 j += 2; 335 } 336 return 1; 337 } 338 339 // This function is to generate Gaussian pyramid images with indexes from 0 to 340 // 8, and construct the feature maps from calculating the center-surround 341 // differences. 342 static int gaussian_pyramid(const double *src, int width[9], int height[9], 343 saliency_feature_map *dst[6]) { 344 double *gaussian_map[9]; // scale = 9 345 gaussian_map[0] = 346 (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0])); 347 if (!gaussian_map[0]) { 348 return 0; 349 } 350 351 memcpy(gaussian_map[0], src, width[0] * height[0] * sizeof(double)); 352 353 for (int i = 1; i < 9; ++i) { 354 int stride = width[i - 1]; 355 int new_width = width[i]; 356 int new_height = height[i]; 357 358 gaussian_map[i] = 359 (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i])); 360 361 if (!gaussian_map[i]) { 362 for (int l = 0; l < i; ++l) { 363 aom_free(gaussian_map[l]); 364 } 365 return 0; 366 } 367 368 memset(gaussian_map[i], 0, new_width * new_height * sizeof(double)); 369 370 decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride, 371 gaussian_map[i]); 372 } 373 374 if (center_surround_diff((const double **)gaussian_map, height, width, dst) == 375 0) { 376 for (int l = 0; l < 9; ++l) { 377 aom_free(gaussian_map[l]); 378 } 379 return 0; 380 } 381 382 for (int i = 0; i < 9; ++i) { 383 aom_free(gaussian_map[i]); 384 } 385 return 1; 386 } 387 388 static int gaussian_pyramid_rgb(double *src_1, double *src_2, int width[9], 389 int height[9], saliency_feature_map *dst[6]) { 390 double *gaussian_map[2][9]; // scale = 9 391 double *src[2]; 392 393 src[0] = src_1; 394 src[1] = src_2; 395 396 for (int k = 0; k < 2; ++k) { 397 gaussian_map[k][0] = (double *)aom_malloc(width[0] * height[0] * 398 sizeof(*gaussian_map[k][0])); 399 if (!gaussian_map[k][0]) { 400 for (int l = 0; l < k; ++l) { 401 aom_free(gaussian_map[l][0]); 402 } 403 return 0; 404 } 405 memcpy(gaussian_map[k][0], src[k], width[0] * height[0] * sizeof(double)); 406 407 for (int i = 1; i < 9; ++i) { 408 int stride = width[i - 1]; 409 int new_width = width[i]; 410 int new_height = height[i]; 411 412 gaussian_map[k][i] = (double *)aom_malloc(new_width * new_height * 413 sizeof(*gaussian_map[k][i])); 414 if (!gaussian_map[k][i]) { 415 for (int l = 0; l < k; ++l) { 416 aom_free(gaussian_map[l][i]); 417 } 418 return 0; 419 } 420 memset(gaussian_map[k][i], 0, new_width * new_height * sizeof(double)); 421 decimate_map(gaussian_map[k][i - 1], height[i - 1], width[i - 1], stride, 422 gaussian_map[k][i]); 423 } 424 } 425 426 if (center_surround_diff_rgb((const double **)gaussian_map[0], 427 (const double **)gaussian_map[1], height, width, 428 dst) == 0) { 429 for (int l = 0; l < 2; ++l) { 430 for (int i = 0; i < 9; ++i) { 431 aom_free(gaussian_map[l][i]); 432 } 433 } 434 return 0; 435 } 436 437 for (int l = 0; l < 2; ++l) { 438 for (int i = 0; i < 9; ++i) { 439 aom_free(gaussian_map[l][i]); 440 } 441 } 442 return 1; 443 } 444 445 static int get_feature_map_intensity(double *intensity, int width[9], 446 int height[9], 447 saliency_feature_map *i_map[6]) { 448 if (gaussian_pyramid(intensity, width, height, i_map) == 0) { 449 return 0; 450 } 451 return 1; 452 } 453 454 static int get_feature_map_rgb(double *cr, double *cg, double *cb, int width[9], 455 int height[9], saliency_feature_map *rg_map[6], 456 saliency_feature_map *by_map[6]) { 457 double *rg_mat = aom_malloc(height[0] * width[0] * sizeof(*rg_mat)); 458 double *by_mat = aom_malloc(height[0] * width[0] * sizeof(*by_mat)); 459 double *gr_mat = aom_malloc(height[0] * width[0] * sizeof(*gr_mat)); 460 double *yb_mat = aom_malloc(height[0] * width[0] * sizeof(*yb_mat)); 461 462 if (!rg_mat || !by_mat || !gr_mat || !yb_mat) { 463 aom_free(rg_mat); 464 aom_free(by_mat); 465 aom_free(gr_mat); 466 aom_free(yb_mat); 467 return 0; 468 } 469 470 double r, g, b, y; 471 for (int i = 0; i < height[0]; ++i) { 472 for (int j = 0; j < width[0]; ++j) { 473 r = AOMMAX(0, cr[i * width[0] + j] - 474 (cg[i * width[0] + j] + cb[i * width[0] + j]) / 2); 475 g = AOMMAX(0, cg[i * width[0] + j] - 476 (cr[i * width[0] + j] + cb[i * width[0] + j]) / 2); 477 b = AOMMAX(0, cb[i * width[0] + j] - 478 (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2); 479 y = AOMMAX(0, (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2 - 480 fabs(cr[i * width[0] + j] - cg[i * width[0] + j]) / 2 - 481 cb[i * width[0] + j]); 482 483 rg_mat[i * width[0] + j] = r - g; 484 by_mat[i * width[0] + j] = b - y; 485 gr_mat[i * width[0] + j] = g - r; 486 yb_mat[i * width[0] + j] = y - b; 487 } 488 } 489 490 if (gaussian_pyramid_rgb(rg_mat, gr_mat, width, height, rg_map) == 0 || 491 gaussian_pyramid_rgb(by_mat, yb_mat, width, height, by_map) == 0) { 492 aom_free(rg_mat); 493 aom_free(by_mat); 494 aom_free(gr_mat); 495 aom_free(yb_mat); 496 return 0; 497 } 498 499 aom_free(rg_mat); 500 aom_free(by_mat); 501 aom_free(gr_mat); 502 aom_free(yb_mat); 503 return 1; 504 } 505 506 static inline void filter2d(const double *input, const double kernel[9][9], 507 int width, int height, double *output) { 508 const int window_size = 9; 509 double map_section[81]; 510 for (int y = 0; y <= height - 1; ++y) { 511 for (int x = 0; x <= width - 1; ++x) { 512 int i = 0; 513 for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) { 514 for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) { 515 int yvalue = clamp(yy, 0, height - 1); 516 int xvalue = clamp(xx, 0, width - 1); 517 map_section[i++] = input[yvalue * width + xvalue]; 518 } 519 } 520 521 output[y * width + x] = 0; 522 for (int k = 0; k < window_size; ++k) { 523 for (int l = 0; l < window_size; ++l) { 524 output[y * width + x] += 525 kernel[k][l] * map_section[k * window_size + l]; 526 } 527 } 528 } 529 } 530 } 531 532 static int get_feature_map_orientation(const double *intensity, int width[9], 533 int height[9], 534 saliency_feature_map *dst[24]) { 535 double *gaussian_map[9]; 536 537 gaussian_map[0] = 538 (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0])); 539 if (!gaussian_map[0]) { 540 return 0; 541 } 542 memcpy(gaussian_map[0], intensity, width[0] * height[0] * sizeof(double)); 543 544 for (int i = 1; i < 9; ++i) { 545 int stride = width[i - 1]; 546 int new_width = width[i]; 547 int new_height = height[i]; 548 549 gaussian_map[i] = 550 (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i])); 551 if (!gaussian_map[i]) { 552 for (int l = 0; l < i; ++l) { 553 aom_free(gaussian_map[l]); 554 } 555 return 0; 556 } 557 memset(gaussian_map[i], 0, new_width * new_height * sizeof(double)); 558 decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride, 559 gaussian_map[i]); 560 } 561 562 double *tempGaborOutput[4][9]; //[angle: 0, 45, 90, 135 degree][filter_size] 563 564 for (int i = 2; i < 9; ++i) { 565 const int cur_height = height[i]; 566 const int cur_width = width[i]; 567 for (int j = 0; j < 4; ++j) { 568 tempGaborOutput[j][i] = (double *)aom_malloc( 569 cur_height * cur_width * sizeof(*tempGaborOutput[j][i])); 570 if (!tempGaborOutput[j][i]) { 571 for (int l = 0; l < 9; ++l) { 572 aom_free(gaussian_map[l]); 573 } 574 for (int h = 0; h < 4; ++h) { 575 for (int g = 2; g < 9; ++g) { 576 aom_free(tempGaborOutput[h][g]); 577 } 578 } 579 return 0; 580 } 581 filter2d(gaussian_map[i], kGaborFilter[j], cur_width, cur_height, 582 tempGaborOutput[j][i]); 583 } 584 } 585 586 for (int i = 0; i < 9; ++i) { 587 aom_free(gaussian_map[i]); 588 } 589 590 saliency_feature_map 591 *tmp[4][6]; //[angle: 0, 45, 90, 135 degree][filter_size] 592 593 for (int i = 0; i < 6; ++i) { 594 for (int j = 0; j < 4; ++j) { 595 tmp[j][i] = dst[j * 6 + i]; 596 } 597 } 598 599 for (int j = 0; j < 4; ++j) { 600 if (center_surround_diff((const double **)tempGaborOutput[j], height, width, 601 tmp[j]) == 0) { 602 for (int h = 0; h < 4; ++h) { 603 for (int g = 2; g < 9; ++g) { 604 aom_free(tempGaborOutput[h][g]); 605 } 606 } 607 return 0; 608 } 609 } 610 611 for (int i = 2; i < 9; ++i) { 612 for (int j = 0; j < 4; ++j) { 613 aom_free(tempGaborOutput[j][i]); 614 } 615 } 616 617 return 1; 618 } 619 620 static inline void find_min_max(const saliency_feature_map *input, 621 double *max_value, double *min_value) { 622 assert(input && input->buf); 623 *min_value = DBL_MAX; 624 *max_value = 0.0; 625 626 for (int i = 0; i < input->height; ++i) { 627 for (int j = 0; j < input->width; ++j) { 628 assert(input->buf[i * input->width + j] >= 0.0); 629 *min_value = fmin(input->buf[i * input->width + j], *min_value); 630 *max_value = fmax(input->buf[i * input->width + j], *max_value); 631 } 632 } 633 } 634 635 static inline double average_local_max(const saliency_feature_map *input, 636 int stepsize) { 637 int numlocal = 0; 638 double lmaxmean = 0, lmax = 0, dummy = 0; 639 saliency_feature_map local_map; 640 local_map.height = stepsize; 641 local_map.width = stepsize; 642 local_map.buf = 643 (double *)aom_malloc(stepsize * stepsize * sizeof(*local_map.buf)); 644 645 if (!local_map.buf) { 646 return -1; 647 } 648 649 for (int y = 0; y < input->height - stepsize; y += stepsize) { 650 for (int x = 0; x < input->width - stepsize; x += stepsize) { 651 for (int i = 0; i < stepsize; ++i) { 652 for (int j = 0; j < stepsize; ++j) { 653 local_map.buf[i * stepsize + j] = 654 input->buf[(y + i) * input->width + x + j]; 655 } 656 } 657 658 find_min_max(&local_map, &lmax, &dummy); 659 lmaxmean += lmax; 660 numlocal++; 661 } 662 } 663 664 aom_free(local_map.buf); 665 666 return lmaxmean / numlocal; 667 } 668 669 // Linear normalization the values in the map to [0,1]. 670 static void minmax_normalize(saliency_feature_map *input) { 671 double max_value, min_value; 672 find_min_max(input, &max_value, &min_value); 673 674 for (int i = 0; i < input->height; ++i) { 675 for (int j = 0; j < input->width; ++j) { 676 if (max_value != min_value) { 677 input->buf[i * input->width + j] = 678 input->buf[i * input->width + j] / (max_value - min_value) + 679 min_value / (min_value - max_value); 680 } else { 681 input->buf[i * input->width + j] -= min_value; 682 } 683 } 684 } 685 } 686 687 // This function is to promote meaningful “activation spots” in the map and 688 // ignores homogeneous areas. 689 static int nomalization_operator(saliency_feature_map *input, int stepsize) { 690 minmax_normalize(input); 691 double lmaxmean = average_local_max(input, stepsize); 692 if (lmaxmean < 0) { 693 return 0; 694 } 695 double normCoeff = (1 - lmaxmean) * (1 - lmaxmean); 696 697 for (int i = 0; i < input->height; ++i) { 698 for (int j = 0; j < input->width; ++j) { 699 input->buf[i * input->width + j] *= normCoeff; 700 } 701 } 702 703 return 1; 704 } 705 706 // Normalize the values in feature maps to [0,1], and then upscale all maps to 707 // the original frame size. 708 static int normalize_fm(saliency_feature_map *input[6], int width[9], 709 int height[9], int num_fm, 710 saliency_feature_map *output[6]) { 711 // Feature maps (FM) are generated by function "center_surround_diff()". The 712 // difference is between a fine scale c and a coarser scale s, where c \in {2, 713 // 3, 4}, and s = c + delta, where delta \in {3, 4}, and the FM size is scale 714 // c. Specifically, i=0: c=2 and s=5, i=1: c=2 and s=6, i=2: c=3 and s=6, i=3: 715 // c=3 and s=7, i=4: c=4 and s=7, i=5: c=4 and s=8. 716 for (int i = 0; i < num_fm; ++i) { 717 if (nomalization_operator(input[i], 8) == 0) { 718 return 0; 719 } 720 721 // Upscale FM to original frame size 722 if (upscale_map(input[i]->buf, (i / 2) + 2, 0, height, width, 723 output[i]->buf) == 0) { 724 return 0; 725 } 726 } 727 return 1; 728 } 729 730 // Combine feature maps with the same category (intensity, color, or 731 // orientation) into one conspicuity map. 732 static int normalized_map(saliency_feature_map *input[6], int width[9], 733 int height[9], saliency_feature_map *output) { 734 int num_fm = 6; 735 736 saliency_feature_map *n_input[6]; 737 for (int i = 0; i < 6; ++i) { 738 n_input[i] = (saliency_feature_map *)aom_malloc(sizeof(*n_input[i])); 739 if (!n_input[i]) { 740 return 0; 741 } 742 n_input[i]->buf = 743 (double *)aom_malloc(width[0] * height[0] * sizeof(*n_input[i]->buf)); 744 if (!n_input[i]->buf) { 745 aom_free(n_input[i]); 746 return 0; 747 } 748 n_input[i]->height = height[0]; 749 n_input[i]->width = width[0]; 750 } 751 752 if (normalize_fm(input, width, height, num_fm, n_input) == 0) { 753 for (int i = 0; i < num_fm; ++i) { 754 aom_free(n_input[i]->buf); 755 aom_free(n_input[i]); 756 } 757 return 0; 758 } 759 760 // Add up all normalized feature maps with the same category into one map. 761 for (int i = 0; i < num_fm; ++i) { 762 for (int r = 0; r < height[0]; ++r) { 763 for (int c = 0; c < width[0]; ++c) { 764 output->buf[r * width[0] + c] += n_input[i]->buf[r * width[0] + c]; 765 } 766 } 767 } 768 769 for (int i = 0; i < num_fm; ++i) { 770 aom_free(n_input[i]->buf); 771 aom_free(n_input[i]); 772 } 773 774 nomalization_operator(output, 8); 775 return 1; 776 } 777 778 static int normalized_map_rgb(saliency_feature_map *rg_map[6], 779 saliency_feature_map *by_map[6], int width[9], 780 int height[9], saliency_feature_map *output) { 781 saliency_feature_map *color_cm[2]; // 0: color_cm_rg, 1: color_cm_by 782 for (int i = 0; i < 2; ++i) { 783 color_cm[i] = aom_malloc(sizeof(*color_cm[i])); 784 if (!color_cm[i]) { 785 return 0; 786 } 787 color_cm[i]->buf = 788 (double *)aom_malloc(width[0] * height[0] * sizeof(*color_cm[i]->buf)); 789 if (!color_cm[i]->buf) { 790 for (int l = 0; l < i; ++l) { 791 aom_free(color_cm[l]->buf); 792 } 793 aom_free(color_cm[i]); 794 return 0; 795 } 796 797 color_cm[i]->width = width[0]; 798 color_cm[i]->height = height[0]; 799 memset(color_cm[i]->buf, 0, 800 width[0] * height[0] * sizeof(*color_cm[i]->buf)); 801 } 802 803 if (normalized_map(rg_map, width, height, color_cm[0]) == 0 || 804 normalized_map(by_map, width, height, color_cm[1]) == 0) { 805 for (int i = 0; i < 2; ++i) { 806 aom_free(color_cm[i]->buf); 807 aom_free(color_cm[i]); 808 } 809 return 0; 810 } 811 812 for (int r = 0; r < height[0]; ++r) { 813 for (int c = 0; c < width[0]; ++c) { 814 output->buf[r * width[0] + c] = color_cm[0]->buf[r * width[0] + c] + 815 color_cm[1]->buf[r * width[0] + c]; 816 } 817 } 818 819 for (int i = 0; i < 2; ++i) { 820 aom_free(color_cm[i]->buf); 821 aom_free(color_cm[i]); 822 } 823 824 nomalization_operator(output, 8); 825 return 1; 826 } 827 828 static int normalized_map_orientation(saliency_feature_map *orientation_map[24], 829 int width[9], int height[9], 830 saliency_feature_map *output) { 831 int num_fms_per_angle = 6; 832 833 saliency_feature_map *ofm[4][6]; 834 for (int i = 0; i < num_fms_per_angle; ++i) { 835 for (int j = 0; j < 4; ++j) { 836 ofm[j][i] = orientation_map[j * num_fms_per_angle + i]; 837 } 838 } 839 840 // extract conspicuity map for each angle 841 saliency_feature_map *nofm = aom_malloc(sizeof(*nofm)); 842 if (!nofm) { 843 return 0; 844 } 845 nofm->buf = (double *)aom_malloc(width[0] * height[0] * sizeof(*nofm->buf)); 846 if (!nofm->buf) { 847 aom_free(nofm); 848 return 0; 849 } 850 nofm->height = height[0]; 851 nofm->width = width[0]; 852 853 for (int i = 0; i < 4; ++i) { 854 memset(nofm->buf, 0, width[0] * height[0] * sizeof(*nofm->buf)); 855 if (normalized_map(ofm[i], width, height, nofm) == 0) { 856 aom_free(nofm->buf); 857 aom_free(nofm); 858 return 0; 859 } 860 861 for (int r = 0; r < height[0]; ++r) { 862 for (int c = 0; c < width[0]; ++c) { 863 output->buf[r * width[0] + c] += nofm->buf[r * width[0] + c]; 864 } 865 } 866 } 867 868 aom_free(nofm->buf); 869 aom_free(nofm); 870 871 nomalization_operator(output, 8); 872 return 1; 873 } 874 875 // Set pixel level saliency mask based on Itti-Koch algorithm 876 int av1_set_saliency_map(AV1_COMP *cpi) { 877 AV1_COMMON *const cm = &cpi->common; 878 879 int frm_width = cm->width; 880 int frm_height = cm->height; 881 882 int pyr_height[9]; 883 int pyr_width[9]; 884 885 pyr_height[0] = frm_height; 886 pyr_width[0] = frm_width; 887 888 for (int i = 1; i < 9; ++i) { 889 pyr_width[i] = pyr_width[i - 1] / 2; 890 pyr_height[i] = pyr_height[i - 1] / 2; 891 } 892 893 double *cr = aom_malloc(frm_width * frm_height * sizeof(*cr)); 894 double *cg = aom_malloc(frm_width * frm_height * sizeof(*cg)); 895 double *cb = aom_malloc(frm_width * frm_height * sizeof(*cb)); 896 double *intensity = aom_malloc(frm_width * frm_height * sizeof(*intensity)); 897 898 if (!cr || !cg || !cb || !intensity) { 899 aom_free(cr); 900 aom_free(cg); 901 aom_free(cb); 902 aom_free(intensity); 903 return 0; 904 } 905 906 // Extract red / green / blue channels and intensity component 907 get_color_intensity(cpi->source, cm->seq_params->subsampling_x, 908 cm->seq_params->subsampling_y, cr, cg, cb, intensity); 909 910 // Feature Map Extraction 911 // intensity map 912 saliency_feature_map *i_map[6]; 913 for (int i = 0; i < 6; ++i) { 914 int cur_height = pyr_height[(i / 2) + 2]; 915 int cur_width = pyr_width[(i / 2) + 2]; 916 917 i_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*i_map[i])); 918 if (!i_map[i]) { 919 aom_free(cr); 920 aom_free(cg); 921 aom_free(cb); 922 aom_free(intensity); 923 for (int l = 0; l < i; ++l) { 924 aom_free(i_map[l]); 925 } 926 return 0; 927 } 928 i_map[i]->buf = 929 (double *)aom_malloc(cur_height * cur_width * sizeof(*i_map[i]->buf)); 930 if (!i_map[i]->buf) { 931 aom_free(cr); 932 aom_free(cg); 933 aom_free(cb); 934 aom_free(intensity); 935 for (int l = 0; l < i; ++l) { 936 aom_free(i_map[l]->buf); 937 aom_free(i_map[l]); 938 } 939 return 0; 940 } 941 i_map[i]->height = cur_height; 942 i_map[i]->width = cur_width; 943 } 944 945 if (get_feature_map_intensity(intensity, pyr_width, pyr_height, i_map) == 0) { 946 aom_free(cr); 947 aom_free(cg); 948 aom_free(cb); 949 aom_free(intensity); 950 for (int l = 0; l < 6; ++l) { 951 aom_free(i_map[l]->buf); 952 aom_free(i_map[l]); 953 } 954 return 0; 955 } 956 957 // RGB map 958 saliency_feature_map *rg_map[6], *by_map[6]; 959 for (int i = 0; i < 6; ++i) { 960 int cur_height = pyr_height[(i / 2) + 2]; 961 int cur_width = pyr_width[(i / 2) + 2]; 962 rg_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*rg_map[i])); 963 by_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*by_map[i])); 964 if (!rg_map[i] || !by_map[i]) { 965 aom_free(cr); 966 aom_free(cg); 967 aom_free(cb); 968 aom_free(intensity); 969 for (int l = 0; l < 6; ++l) { 970 aom_free(i_map[l]->buf); 971 aom_free(i_map[l]); 972 aom_free(rg_map[l]); 973 aom_free(by_map[l]); 974 } 975 return 0; 976 } 977 rg_map[i]->buf = 978 (double *)aom_malloc(cur_height * cur_width * sizeof(*rg_map[i]->buf)); 979 by_map[i]->buf = 980 (double *)aom_malloc(cur_height * cur_width * sizeof(*by_map[i]->buf)); 981 if (!by_map[i]->buf || !rg_map[i]->buf) { 982 aom_free(cr); 983 aom_free(cg); 984 aom_free(cb); 985 aom_free(intensity); 986 for (int l = 0; l < 6; ++l) { 987 aom_free(i_map[l]->buf); 988 aom_free(i_map[l]); 989 } 990 for (int l = 0; l < i; ++l) { 991 aom_free(rg_map[l]->buf); 992 aom_free(by_map[l]->buf); 993 aom_free(rg_map[l]); 994 aom_free(by_map[l]); 995 } 996 return 0; 997 } 998 rg_map[i]->height = cur_height; 999 rg_map[i]->width = cur_width; 1000 by_map[i]->height = cur_height; 1001 by_map[i]->width = cur_width; 1002 } 1003 1004 if (get_feature_map_rgb(cr, cg, cb, pyr_width, pyr_height, rg_map, by_map) == 1005 0) { 1006 aom_free(cr); 1007 aom_free(cg); 1008 aom_free(cb); 1009 aom_free(intensity); 1010 for (int l = 0; l < 6; ++l) { 1011 aom_free(i_map[l]->buf); 1012 aom_free(rg_map[l]->buf); 1013 aom_free(by_map[l]->buf); 1014 aom_free(i_map[l]); 1015 aom_free(rg_map[l]); 1016 aom_free(by_map[l]); 1017 } 1018 return 0; 1019 } 1020 1021 // Orientation map 1022 saliency_feature_map *orientation_map[24]; 1023 for (int i = 0; i < 24; ++i) { 1024 int cur_height = pyr_height[((i % 6) / 2) + 2]; 1025 int cur_width = pyr_width[((i % 6) / 2) + 2]; 1026 1027 orientation_map[i] = 1028 (saliency_feature_map *)aom_malloc(sizeof(*orientation_map[i])); 1029 if (!orientation_map[i]) { 1030 aom_free(cr); 1031 aom_free(cg); 1032 aom_free(cb); 1033 aom_free(intensity); 1034 for (int l = 0; l < 6; ++l) { 1035 aom_free(i_map[l]->buf); 1036 aom_free(rg_map[l]->buf); 1037 aom_free(by_map[l]->buf); 1038 aom_free(i_map[l]); 1039 aom_free(rg_map[l]); 1040 aom_free(by_map[l]); 1041 } 1042 for (int h = 0; h < i; ++h) { 1043 aom_free(orientation_map[h]); 1044 } 1045 return 0; 1046 } 1047 1048 orientation_map[i]->buf = (double *)aom_malloc( 1049 cur_height * cur_width * sizeof(*orientation_map[i]->buf)); 1050 if (!orientation_map[i]->buf) { 1051 aom_free(cr); 1052 aom_free(cg); 1053 aom_free(cb); 1054 aom_free(intensity); 1055 for (int l = 0; l < 6; ++l) { 1056 aom_free(i_map[l]->buf); 1057 aom_free(rg_map[l]->buf); 1058 aom_free(by_map[l]->buf); 1059 aom_free(i_map[l]); 1060 aom_free(rg_map[l]); 1061 aom_free(by_map[l]); 1062 } 1063 1064 for (int h = 0; h < i; ++h) { 1065 aom_free(orientation_map[h]->buf); 1066 aom_free(orientation_map[h]->buf); 1067 aom_free(orientation_map[h]); 1068 aom_free(orientation_map[h]); 1069 } 1070 return 0; 1071 } 1072 1073 orientation_map[i]->height = cur_height; 1074 orientation_map[i]->width = cur_width; 1075 } 1076 1077 if (get_feature_map_orientation(intensity, pyr_width, pyr_height, 1078 orientation_map) == 0) { 1079 aom_free(cr); 1080 aom_free(cg); 1081 aom_free(cb); 1082 aom_free(intensity); 1083 for (int l = 0; l < 6; ++l) { 1084 aom_free(i_map[l]->buf); 1085 aom_free(rg_map[l]->buf); 1086 aom_free(by_map[l]->buf); 1087 aom_free(i_map[l]); 1088 aom_free(rg_map[l]); 1089 aom_free(by_map[l]); 1090 } 1091 for (int h = 0; h < 24; ++h) { 1092 aom_free(orientation_map[h]->buf); 1093 aom_free(orientation_map[h]); 1094 } 1095 return 0; 1096 } 1097 1098 aom_free(cr); 1099 aom_free(cg); 1100 aom_free(cb); 1101 aom_free(intensity); 1102 1103 saliency_feature_map 1104 *normalized_maps[3]; // 0: intensity, 1: color, 2: orientation 1105 1106 for (int i = 0; i < 3; ++i) { 1107 normalized_maps[i] = aom_malloc(sizeof(*normalized_maps[i])); 1108 if (!normalized_maps[i]) { 1109 for (int l = 0; l < 6; ++l) { 1110 aom_free(i_map[l]->buf); 1111 aom_free(rg_map[l]->buf); 1112 aom_free(by_map[l]->buf); 1113 aom_free(i_map[l]); 1114 aom_free(rg_map[l]); 1115 aom_free(by_map[l]); 1116 } 1117 1118 for (int h = 0; h < 24; ++h) { 1119 aom_free(orientation_map[h]->buf); 1120 aom_free(orientation_map[h]); 1121 } 1122 1123 for (int l = 0; l < i; ++l) { 1124 aom_free(normalized_maps[l]); 1125 } 1126 return 0; 1127 } 1128 normalized_maps[i]->buf = (double *)aom_malloc( 1129 frm_width * frm_height * sizeof(*normalized_maps[i]->buf)); 1130 if (!normalized_maps[i]->buf) { 1131 for (int l = 0; l < 6; ++l) { 1132 aom_free(i_map[l]->buf); 1133 aom_free(rg_map[l]->buf); 1134 aom_free(by_map[l]->buf); 1135 aom_free(i_map[l]); 1136 aom_free(rg_map[l]); 1137 aom_free(by_map[l]); 1138 } 1139 for (int h = 0; h < 24; ++h) { 1140 aom_free(orientation_map[h]->buf); 1141 aom_free(orientation_map[h]); 1142 } 1143 for (int l = 0; l < i; ++l) { 1144 aom_free(normalized_maps[l]->buf); 1145 aom_free(normalized_maps[l]); 1146 } 1147 return 0; 1148 } 1149 normalized_maps[i]->width = frm_width; 1150 normalized_maps[i]->height = frm_height; 1151 memset(normalized_maps[i]->buf, 0, 1152 frm_width * frm_height * sizeof(*normalized_maps[i]->buf)); 1153 } 1154 1155 // Conspicuity map generation 1156 if (normalized_map(i_map, pyr_width, pyr_height, normalized_maps[0]) == 0 || 1157 normalized_map_rgb(rg_map, by_map, pyr_width, pyr_height, 1158 normalized_maps[1]) == 0 || 1159 normalized_map_orientation(orientation_map, pyr_width, pyr_height, 1160 normalized_maps[2]) == 0) { 1161 for (int i = 0; i < 6; ++i) { 1162 aom_free(i_map[i]->buf); 1163 aom_free(rg_map[i]->buf); 1164 aom_free(by_map[i]->buf); 1165 aom_free(i_map[i]); 1166 aom_free(rg_map[i]); 1167 aom_free(by_map[i]); 1168 } 1169 1170 for (int i = 0; i < 24; ++i) { 1171 aom_free(orientation_map[i]->buf); 1172 aom_free(orientation_map[i]); 1173 } 1174 1175 for (int i = 0; i < 3; ++i) { 1176 aom_free(normalized_maps[i]->buf); 1177 aom_free(normalized_maps[i]); 1178 } 1179 return 0; 1180 } 1181 1182 for (int i = 0; i < 6; ++i) { 1183 aom_free(i_map[i]->buf); 1184 aom_free(rg_map[i]->buf); 1185 aom_free(by_map[i]->buf); 1186 aom_free(i_map[i]); 1187 aom_free(rg_map[i]); 1188 aom_free(by_map[i]); 1189 } 1190 1191 for (int i = 0; i < 24; ++i) { 1192 aom_free(orientation_map[i]->buf); 1193 aom_free(orientation_map[i]); 1194 } 1195 1196 // Pixel level saliency map 1197 saliency_feature_map *combined_saliency_map = 1198 aom_malloc(sizeof(*combined_saliency_map)); 1199 if (!combined_saliency_map) { 1200 for (int i = 0; i < 3; ++i) { 1201 aom_free(normalized_maps[i]->buf); 1202 aom_free(normalized_maps[i]); 1203 } 1204 return 0; 1205 } 1206 1207 combined_saliency_map->buf = (double *)aom_malloc( 1208 frm_width * frm_height * sizeof(*combined_saliency_map->buf)); 1209 if (!combined_saliency_map->buf) { 1210 for (int i = 0; i < 3; ++i) { 1211 aom_free(normalized_maps[i]->buf); 1212 aom_free(normalized_maps[i]); 1213 } 1214 1215 aom_free(combined_saliency_map); 1216 return 0; 1217 } 1218 combined_saliency_map->height = frm_height; 1219 combined_saliency_map->width = frm_width; 1220 1221 double w_intensity, w_color, w_orient; 1222 1223 w_intensity = w_color = w_orient = (double)1 / 3; 1224 1225 for (int r = 0; r < frm_height; ++r) { 1226 for (int c = 0; c < frm_width; ++c) { 1227 combined_saliency_map->buf[r * frm_width + c] = 1228 (w_intensity * normalized_maps[0]->buf[r * frm_width + c] + 1229 w_color * normalized_maps[1]->buf[r * frm_width + c] + 1230 w_orient * normalized_maps[2]->buf[r * frm_width + c]); 1231 } 1232 } 1233 1234 for (int r = 0; r < frm_height; ++r) { 1235 for (int c = 0; c < frm_width; ++c) { 1236 int index = r * frm_width + c; 1237 cpi->saliency_map[index] = 1238 (uint8_t)(combined_saliency_map->buf[index] * 255); 1239 } 1240 } 1241 1242 for (int i = 0; i < 3; ++i) { 1243 aom_free(normalized_maps[i]->buf); 1244 aom_free(normalized_maps[i]); 1245 } 1246 1247 aom_free(combined_saliency_map->buf); 1248 aom_free(combined_saliency_map); 1249 1250 return 1; 1251 } 1252 1253 // Set superblock level saliency mask for rdmult scaling 1254 int av1_setup_sm_rdmult_scaling_factor(AV1_COMP *cpi, double motion_ratio) { 1255 AV1_COMMON *cm = &cpi->common; 1256 1257 saliency_feature_map *sb_saliency_map = 1258 aom_malloc(sizeof(saliency_feature_map)); 1259 1260 if (sb_saliency_map == NULL) { 1261 return 0; 1262 } 1263 1264 const BLOCK_SIZE bsize = cm->seq_params->sb_size; 1265 const int num_mi_w = mi_size_wide[bsize]; 1266 const int num_mi_h = mi_size_high[bsize]; 1267 const int block_width = block_size_wide[bsize]; 1268 const int block_height = block_size_high[bsize]; 1269 const int num_sb_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w; 1270 const int num_sb_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; 1271 1272 sb_saliency_map->height = num_sb_rows; 1273 sb_saliency_map->width = num_sb_cols; 1274 sb_saliency_map->buf = (double *)aom_malloc(num_sb_rows * num_sb_cols * 1275 sizeof(*sb_saliency_map->buf)); 1276 1277 if (sb_saliency_map->buf == NULL) { 1278 aom_free(sb_saliency_map); 1279 return 0; 1280 } 1281 1282 for (int row = 0; row < num_sb_rows; ++row) { 1283 for (int col = 0; col < num_sb_cols; ++col) { 1284 const int index = row * num_sb_cols + col; 1285 double total_pixel = 0; 1286 double total_weight = 0; 1287 1288 for (int i = 0; i < block_height; i++) { 1289 for (int j = 0; j < block_width; j++) { 1290 if ((row * block_height + i) >= cpi->common.height || 1291 (col * block_width + j) >= cpi->common.width) 1292 continue; 1293 total_pixel++; 1294 total_weight += 1295 cpi->saliency_map[(row * block_height + i) * cpi->common.width + 1296 col * block_width + j]; 1297 } 1298 } 1299 1300 assert(total_pixel > 0); 1301 1302 // Calculate the superblock level saliency map from pixel level saliency 1303 // map 1304 sb_saliency_map->buf[index] = total_weight / total_pixel; 1305 1306 // Further lower the superblock saliency score for boundary superblocks. 1307 if (row < 1 || row > num_sb_rows - 2 || col < 1 || 1308 col > num_sb_cols - 2) { 1309 sb_saliency_map->buf[index] /= 5; 1310 } 1311 } 1312 } 1313 1314 // superblock level saliency map finalization 1315 minmax_normalize(sb_saliency_map); 1316 1317 double log_sum = 0.0; 1318 double sum = 0.0; 1319 int block_count = 0; 1320 1321 // Calculate the average superblock sm_scaling_factor for a frame, to be used 1322 // for clamping later. 1323 for (int row = 0; row < num_sb_rows; ++row) { 1324 for (int col = 0; col < num_sb_cols; ++col) { 1325 const int index = row * num_sb_cols + col; 1326 const double saliency = sb_saliency_map->buf[index]; 1327 1328 cpi->sm_scaling_factor[index] = 1 - saliency; 1329 sum += cpi->sm_scaling_factor[index]; 1330 block_count++; 1331 } 1332 } 1333 assert(block_count > 0); 1334 sum /= block_count; 1335 1336 // Calculate the geometric mean of superblock sm_scaling_factor for a frame, 1337 // to be used for normalization. 1338 for (int row = 0; row < num_sb_rows; ++row) { 1339 for (int col = 0; col < num_sb_cols; ++col) { 1340 const int index = row * num_sb_cols + col; 1341 log_sum += log(fmax(cpi->sm_scaling_factor[index], 0.001)); 1342 cpi->sm_scaling_factor[index] = 1343 fmax(cpi->sm_scaling_factor[index], 0.8 * sum); 1344 } 1345 } 1346 1347 log_sum = exp(log_sum / block_count); 1348 1349 // Normalize the sm_scaling_factor by geometric mean. 1350 for (int row = 0; row < num_sb_rows; ++row) { 1351 for (int col = 0; col < num_sb_cols; ++col) { 1352 const int index = row * num_sb_cols + col; 1353 assert(log_sum > 0); 1354 cpi->sm_scaling_factor[index] /= log_sum; 1355 1356 // Modulate the sm_scaling_factor by frame basis motion factor 1357 cpi->sm_scaling_factor[index] = 1358 cpi->sm_scaling_factor[index] * motion_ratio; 1359 } 1360 } 1361 1362 aom_free(sb_saliency_map->buf); 1363 aom_free(sb_saliency_map); 1364 return 1; 1365 } 1366 1367 // av1_setup_motion_ratio() is only enabled when CONFIG_REALTIME_ONLY is 0, 1368 // because the computations need to access the first pass stats which are 1369 // only available when CONFIG_REALTIME_ONLY is equal to 0. 1370 #if !CONFIG_REALTIME_ONLY 1371 // Set motion_ratio that reflects the motion quantities between two consecutive 1372 // frames. Motion_ratio will be used to set up saliency_map based rdmult scaling 1373 // factor, i.e., the less the motion quantities are, the more bits will be spent 1374 // on this frame, and vice versa. 1375 double av1_setup_motion_ratio(AV1_COMP *cpi) { 1376 AV1_COMMON *cm = &cpi->common; 1377 int frames_since_key = 1378 cm->current_frame.display_order_hint - cpi->rc.frames_since_key; 1379 const FIRSTPASS_STATS *cur_stats = av1_firstpass_info_peek( 1380 &cpi->ppi->twopass.firstpass_info, frames_since_key); 1381 assert(cur_stats != NULL); 1382 assert(cpi->ppi->twopass.firstpass_info.total_stats.count > 0); 1383 1384 const double avg_intra_error = 1385 exp(cpi->ppi->twopass.firstpass_info.total_stats.log_intra_error / 1386 cpi->ppi->twopass.firstpass_info.total_stats.count); 1387 const double avg_inter_error = 1388 exp(cpi->ppi->twopass.firstpass_info.total_stats.log_coded_error / 1389 cpi->ppi->twopass.firstpass_info.total_stats.count); 1390 1391 double inter_error = cur_stats->coded_error; 1392 double error_stdev = 0; 1393 const double avg_error = 1394 cpi->ppi->twopass.firstpass_info.total_stats.intra_error / 1395 cpi->ppi->twopass.firstpass_info.total_stats.count; 1396 for (int i = 0; i < cpi->ppi->twopass.firstpass_info.total_stats.count; i++) { 1397 const FIRSTPASS_STATS *stats = 1398 &cpi->ppi->twopass.firstpass_info.stats_buf[i]; 1399 error_stdev += 1400 (stats->intra_error - avg_error) * (stats->intra_error - avg_error); 1401 } 1402 error_stdev = 1403 sqrt(error_stdev / cpi->ppi->twopass.firstpass_info.total_stats.count); 1404 1405 double motion_ratio = 1; 1406 if (error_stdev / fmax(avg_intra_error, 1) > 0.1) { 1407 motion_ratio = inter_error / fmax(1, avg_inter_error); 1408 motion_ratio = AOMMIN(motion_ratio, 1.5); 1409 motion_ratio = AOMMAX(motion_ratio, 0.8); 1410 } 1411 1412 return motion_ratio; 1413 } 1414 #endif // !CONFIG_REALTIME_ONLY