thread_common.h (12798B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_COMMON_THREAD_COMMON_H_ 13 #define AOM_AV1_COMMON_THREAD_COMMON_H_ 14 15 #include "config/aom_config.h" 16 17 #include "av1/common/av1_loopfilter.h" 18 #include "av1/common/cdef.h" 19 #include "aom_util/aom_pthread.h" 20 #include "aom_util/aom_thread.h" 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 26 struct AV1Common; 27 28 typedef struct AV1LfMTInfo { 29 int mi_row; 30 int plane; 31 int dir; 32 int lpf_opt_level; 33 } AV1LfMTInfo; 34 35 // Loopfilter row synchronization 36 typedef struct AV1LfSyncData { 37 #if CONFIG_MULTITHREAD 38 pthread_mutex_t *mutex_[MAX_MB_PLANE]; 39 pthread_cond_t *cond_[MAX_MB_PLANE]; 40 #endif 41 // Allocate memory to store the loop-filtered superblock index in each row. 42 int *cur_sb_col[MAX_MB_PLANE]; 43 // The optimal sync_range for different resolution and platform should be 44 // determined by testing. Currently, it is chosen to be a power-of-2 number. 45 int sync_range; 46 int rows; 47 48 // Row-based parallel loopfilter data 49 LFWorkerData *lfdata; 50 int num_workers; 51 52 #if CONFIG_MULTITHREAD 53 pthread_mutex_t *job_mutex; 54 #endif 55 AV1LfMTInfo *job_queue; 56 int jobs_enqueued; 57 int jobs_dequeued; 58 59 // Initialized to false, set to true by the worker thread that encounters an 60 // error in order to abort the processing of other worker threads. 61 bool lf_mt_exit; 62 } AV1LfSync; 63 64 typedef struct AV1LrMTInfo { 65 int v_start; 66 int v_end; 67 int lr_unit_row; 68 int plane; 69 int sync_mode; 70 int v_copy_start; 71 int v_copy_end; 72 } AV1LrMTInfo; 73 74 typedef struct LoopRestorationWorkerData { 75 int32_t *rst_tmpbuf; 76 void *rlbs; 77 void *lr_ctxt; 78 int do_extend_border; 79 struct aom_internal_error_info error_info; 80 } LRWorkerData; 81 82 // Looprestoration row synchronization 83 typedef struct AV1LrSyncData { 84 #if CONFIG_MULTITHREAD 85 pthread_mutex_t *mutex_[MAX_MB_PLANE]; 86 pthread_cond_t *cond_[MAX_MB_PLANE]; 87 #endif 88 // Allocate memory to store the loop-restoration block index in each row. 89 int *cur_sb_col[MAX_MB_PLANE]; 90 // The optimal sync_range for different resolution and platform should be 91 // determined by testing. Currently, it is chosen to be a power-of-2 number. 92 int sync_range; 93 int rows; 94 int num_planes; 95 96 int num_workers; 97 98 #if CONFIG_MULTITHREAD 99 pthread_mutex_t *job_mutex; 100 #endif 101 // Row-based parallel loopfilter data 102 LRWorkerData *lrworkerdata; 103 104 AV1LrMTInfo *job_queue; 105 int jobs_enqueued; 106 int jobs_dequeued; 107 // Initialized to false, set to true by the worker thread that encounters 108 // an error in order to abort the processing of other worker threads. 109 bool lr_mt_exit; 110 } AV1LrSync; 111 112 typedef struct AV1CdefWorker { 113 AV1_COMMON *cm; 114 MACROBLOCKD *xd; 115 uint16_t *colbuf[MAX_MB_PLANE]; 116 uint16_t *srcbuf; 117 uint16_t *linebuf[MAX_MB_PLANE]; 118 cdef_init_fb_row_t cdef_init_fb_row_fn; 119 int do_extend_border; 120 struct aom_internal_error_info error_info; 121 } AV1CdefWorkerData; 122 123 typedef struct AV1CdefRowSync { 124 #if CONFIG_MULTITHREAD 125 pthread_mutex_t *row_mutex_; 126 pthread_cond_t *row_cond_; 127 #endif // CONFIG_MULTITHREAD 128 int is_row_done; 129 } AV1CdefRowSync; 130 131 // Data related to CDEF search multi-thread synchronization. 132 typedef struct AV1CdefSyncData { 133 #if CONFIG_MULTITHREAD 134 // Mutex lock used while dispatching jobs. 135 pthread_mutex_t *mutex_; 136 #endif // CONFIG_MULTITHREAD 137 // Data related to CDEF row mt sync information 138 AV1CdefRowSync *cdef_row_mt; 139 // Flag to indicate all blocks are processed and end of frame is reached 140 int end_of_frame; 141 // Row index in units of 64x64 block 142 int fbr; 143 // Column index in units of 64x64 block 144 int fbc; 145 // Initialized to false, set to true by the worker thread that encounters 146 // an error in order to abort the processing of other worker threads. 147 bool cdef_mt_exit; 148 } AV1CdefSync; 149 150 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd, 151 AV1CdefWorkerData *const cdef_worker, 152 AVxWorker *const workers, AV1CdefSync *const cdef_sync, 153 int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn, 154 int do_extend_border); 155 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm, 156 const MACROBLOCKD *const xd, 157 CdefBlockInfo *const fb_info, 158 uint16_t **const linebuf, uint16_t *const src, 159 struct AV1CdefSyncData *const cdef_sync, int fbr); 160 void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst, 161 int dstride, const uint8_t *src, int src_voffset, 162 int src_hoffset, int sstride, int vsize, int hsize); 163 void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride, 164 const uint8_t *src, int src_voffset, 165 int src_hoffset, int sstride, int vsize, 166 int hsize); 167 #if CONFIG_AV1_HIGHBITDEPTH 168 void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride, 169 const uint8_t *src, int src_voffset, 170 int src_hoffset, int sstride, int vsize, 171 int hsize); 172 #endif // CONFIG_AV1_HIGHBITDEPTH 173 void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync, 174 int num_workers); 175 void av1_free_cdef_sync(AV1CdefSync *cdef_sync); 176 177 // Deallocate loopfilter synchronization related mutex and data. 178 void av1_loop_filter_dealloc(AV1LfSync *lf_sync); 179 void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows, 180 int width, int num_workers); 181 182 void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync, 183 int num_mis_in_lpf_unit_height_log2); 184 185 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm, 186 struct macroblockd *xd, int plane_start, 187 int plane_end, int partial_frame, 188 AVxWorker *workers, int num_workers, 189 AV1LfSync *lf_sync, int lpf_opt_level); 190 191 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 192 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame, 193 struct AV1Common *cm, 194 int optimized_lr, AVxWorker *workers, 195 int num_workers, AV1LrSync *lr_sync, 196 void *lr_ctxt, int do_extend_border); 197 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync); 198 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm, 199 int num_workers, int num_rows_lr, 200 int num_planes, int width); 201 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 202 203 int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm); 204 205 void av1_thread_loop_filter_rows( 206 const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm, 207 struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane, 208 int dir, int lpf_opt_level, AV1LfSync *const lf_sync, 209 struct aom_internal_error_info *error_info, 210 AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2); 211 212 static AOM_FORCE_INLINE bool skip_loop_filter_plane( 213 const int planes_to_lf[MAX_MB_PLANE], int plane, int lpf_opt_level) { 214 // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both 215 // chroma planes together 216 if (lpf_opt_level == 2) { 217 if (plane == AOM_PLANE_Y) { 218 return !planes_to_lf[plane]; 219 } 220 if (plane == AOM_PLANE_U) { 221 // U and V are handled together 222 return !planes_to_lf[1] && !planes_to_lf[2]; 223 } 224 assert(plane == AOM_PLANE_V); 225 if (plane == AOM_PLANE_V) { 226 // V is handled when u is filtered 227 return true; 228 } 229 } 230 231 // Normal operation mode 232 return !planes_to_lf[plane]; 233 } 234 235 static inline void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop, 236 const int planes_to_lf[MAX_MB_PLANE], 237 int lpf_opt_level, 238 int num_mis_in_lpf_unit_height) { 239 int mi_row, plane, dir; 240 AV1LfMTInfo *lf_job_queue = lf_sync->job_queue; 241 lf_sync->jobs_enqueued = 0; 242 lf_sync->jobs_dequeued = 0; 243 244 // Launch all vertical jobs first, as they are blocking the horizontal ones. 245 // Launch top row jobs for all planes first, in case the output can be 246 // partially reconstructed row by row. 247 for (dir = 0; dir < 2; ++dir) { 248 for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) { 249 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 250 if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) { 251 continue; 252 } 253 if (!planes_to_lf[plane]) continue; 254 lf_job_queue->mi_row = mi_row; 255 lf_job_queue->plane = plane; 256 lf_job_queue->dir = dir; 257 lf_job_queue->lpf_opt_level = lpf_opt_level; 258 lf_job_queue++; 259 lf_sync->jobs_enqueued++; 260 } 261 } 262 } 263 } 264 265 static inline void loop_filter_frame_mt_init( 266 AV1_COMMON *cm, int start_mi_row, int end_mi_row, 267 const int planes_to_lf[MAX_MB_PLANE], int num_workers, AV1LfSync *lf_sync, 268 int lpf_opt_level, int num_mis_in_lpf_unit_height_log2) { 269 // Number of superblock rows 270 const int sb_rows = 271 CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2); 272 273 if (!lf_sync->sync_range || sb_rows != lf_sync->rows || 274 num_workers > lf_sync->num_workers) { 275 av1_loop_filter_dealloc(lf_sync); 276 av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); 277 } 278 lf_sync->lf_mt_exit = false; 279 280 // Initialize cur_sb_col to -1 for all SB rows. 281 for (int i = 0; i < MAX_MB_PLANE; i++) { 282 memset(lf_sync->cur_sb_col[i], -1, 283 sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows); 284 } 285 286 enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf, 287 lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2)); 288 } 289 290 static inline AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) { 291 AV1LfMTInfo *cur_job_info = NULL; 292 293 #if CONFIG_MULTITHREAD 294 pthread_mutex_lock(lf_sync->job_mutex); 295 296 if (!lf_sync->lf_mt_exit && lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) { 297 cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued; 298 lf_sync->jobs_dequeued++; 299 } 300 301 pthread_mutex_unlock(lf_sync->job_mutex); 302 #else 303 (void)lf_sync; 304 #endif 305 306 return cur_job_info; 307 } 308 309 static inline void loop_filter_data_reset(LFWorkerData *lf_data, 310 YV12_BUFFER_CONFIG *frame_buffer, 311 struct AV1Common *cm, 312 MACROBLOCKD *xd) { 313 struct macroblockd_plane *pd = xd->plane; 314 lf_data->frame_buffer = frame_buffer; 315 lf_data->cm = cm; 316 lf_data->xd = xd; 317 for (int i = 0; i < MAX_MB_PLANE; i++) { 318 lf_data->planes[i].dst = pd[i].dst; 319 lf_data->planes[i].subsampling_x = pd[i].subsampling_x; 320 lf_data->planes[i].subsampling_y = pd[i].subsampling_y; 321 } 322 } 323 324 static inline void set_planes_to_loop_filter(const struct loopfilter *lf, 325 int planes_to_lf[MAX_MB_PLANE], 326 int plane_start, int plane_end) { 327 // For each luma and chroma plane, whether to filter it or not. 328 planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) && 329 plane_start <= 0 && 0 < plane_end; 330 planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end; 331 planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end; 332 } 333 334 static inline int check_planes_to_loop_filter(const struct loopfilter *lf, 335 int planes_to_lf[MAX_MB_PLANE], 336 int plane_start, int plane_end) { 337 set_planes_to_loop_filter(lf, planes_to_lf, plane_start, plane_end); 338 // If the luma plane is purposely not filtered, neither are the chroma 339 // planes. 340 if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0; 341 // Early exit. 342 if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0; 343 return 1; 344 } 345 346 #ifdef __cplusplus 347 } // extern "C" 348 #endif 349 350 #endif // AOM_AV1_COMMON_THREAD_COMMON_H_