tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

thread_common.h (12798B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
     13 #define AOM_AV1_COMMON_THREAD_COMMON_H_
     14 
     15 #include "config/aom_config.h"
     16 
     17 #include "av1/common/av1_loopfilter.h"
     18 #include "av1/common/cdef.h"
     19 #include "aom_util/aom_pthread.h"
     20 #include "aom_util/aom_thread.h"
     21 
     22 #ifdef __cplusplus
     23 extern "C" {
     24 #endif
     25 
     26 struct AV1Common;
     27 
     28 typedef struct AV1LfMTInfo {
     29  int mi_row;
     30  int plane;
     31  int dir;
     32  int lpf_opt_level;
     33 } AV1LfMTInfo;
     34 
     35 // Loopfilter row synchronization
     36 typedef struct AV1LfSyncData {
     37 #if CONFIG_MULTITHREAD
     38  pthread_mutex_t *mutex_[MAX_MB_PLANE];
     39  pthread_cond_t *cond_[MAX_MB_PLANE];
     40 #endif
     41  // Allocate memory to store the loop-filtered superblock index in each row.
     42  int *cur_sb_col[MAX_MB_PLANE];
     43  // The optimal sync_range for different resolution and platform should be
     44  // determined by testing. Currently, it is chosen to be a power-of-2 number.
     45  int sync_range;
     46  int rows;
     47 
     48  // Row-based parallel loopfilter data
     49  LFWorkerData *lfdata;
     50  int num_workers;
     51 
     52 #if CONFIG_MULTITHREAD
     53  pthread_mutex_t *job_mutex;
     54 #endif
     55  AV1LfMTInfo *job_queue;
     56  int jobs_enqueued;
     57  int jobs_dequeued;
     58 
     59  // Initialized to false, set to true by the worker thread that encounters an
     60  // error in order to abort the processing of other worker threads.
     61  bool lf_mt_exit;
     62 } AV1LfSync;
     63 
     64 typedef struct AV1LrMTInfo {
     65  int v_start;
     66  int v_end;
     67  int lr_unit_row;
     68  int plane;
     69  int sync_mode;
     70  int v_copy_start;
     71  int v_copy_end;
     72 } AV1LrMTInfo;
     73 
     74 typedef struct LoopRestorationWorkerData {
     75  int32_t *rst_tmpbuf;
     76  void *rlbs;
     77  void *lr_ctxt;
     78  int do_extend_border;
     79  struct aom_internal_error_info error_info;
     80 } LRWorkerData;
     81 
     82 // Looprestoration row synchronization
     83 typedef struct AV1LrSyncData {
     84 #if CONFIG_MULTITHREAD
     85  pthread_mutex_t *mutex_[MAX_MB_PLANE];
     86  pthread_cond_t *cond_[MAX_MB_PLANE];
     87 #endif
     88  // Allocate memory to store the loop-restoration block index in each row.
     89  int *cur_sb_col[MAX_MB_PLANE];
     90  // The optimal sync_range for different resolution and platform should be
     91  // determined by testing. Currently, it is chosen to be a power-of-2 number.
     92  int sync_range;
     93  int rows;
     94  int num_planes;
     95 
     96  int num_workers;
     97 
     98 #if CONFIG_MULTITHREAD
     99  pthread_mutex_t *job_mutex;
    100 #endif
    101  // Row-based parallel loopfilter data
    102  LRWorkerData *lrworkerdata;
    103 
    104  AV1LrMTInfo *job_queue;
    105  int jobs_enqueued;
    106  int jobs_dequeued;
    107  // Initialized to false, set to true by the worker thread that encounters
    108  // an error in order to abort the processing of other worker threads.
    109  bool lr_mt_exit;
    110 } AV1LrSync;
    111 
    112 typedef struct AV1CdefWorker {
    113  AV1_COMMON *cm;
    114  MACROBLOCKD *xd;
    115  uint16_t *colbuf[MAX_MB_PLANE];
    116  uint16_t *srcbuf;
    117  uint16_t *linebuf[MAX_MB_PLANE];
    118  cdef_init_fb_row_t cdef_init_fb_row_fn;
    119  int do_extend_border;
    120  struct aom_internal_error_info error_info;
    121 } AV1CdefWorkerData;
    122 
    123 typedef struct AV1CdefRowSync {
    124 #if CONFIG_MULTITHREAD
    125  pthread_mutex_t *row_mutex_;
    126  pthread_cond_t *row_cond_;
    127 #endif  // CONFIG_MULTITHREAD
    128  int is_row_done;
    129 } AV1CdefRowSync;
    130 
    131 // Data related to CDEF search multi-thread synchronization.
    132 typedef struct AV1CdefSyncData {
    133 #if CONFIG_MULTITHREAD
    134  // Mutex lock used while dispatching jobs.
    135  pthread_mutex_t *mutex_;
    136 #endif  // CONFIG_MULTITHREAD
    137  // Data related to CDEF row mt sync information
    138  AV1CdefRowSync *cdef_row_mt;
    139  // Flag to indicate all blocks are processed and end of frame is reached
    140  int end_of_frame;
    141  // Row index in units of 64x64 block
    142  int fbr;
    143  // Column index in units of 64x64 block
    144  int fbc;
    145  // Initialized to false, set to true by the worker thread that encounters
    146  // an error in order to abort the processing of other worker threads.
    147  bool cdef_mt_exit;
    148 } AV1CdefSync;
    149 
    150 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
    151                       AV1CdefWorkerData *const cdef_worker,
    152                       AVxWorker *const workers, AV1CdefSync *const cdef_sync,
    153                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
    154                       int do_extend_border);
    155 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
    156                             const MACROBLOCKD *const xd,
    157                             CdefBlockInfo *const fb_info,
    158                             uint16_t **const linebuf, uint16_t *const src,
    159                             struct AV1CdefSyncData *const cdef_sync, int fbr);
    160 void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
    161                          int dstride, const uint8_t *src, int src_voffset,
    162                          int src_hoffset, int sstride, int vsize, int hsize);
    163 void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride,
    164                                const uint8_t *src, int src_voffset,
    165                                int src_hoffset, int sstride, int vsize,
    166                                int hsize);
    167 #if CONFIG_AV1_HIGHBITDEPTH
    168 void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride,
    169                                 const uint8_t *src, int src_voffset,
    170                                 int src_hoffset, int sstride, int vsize,
    171                                 int hsize);
    172 #endif  // CONFIG_AV1_HIGHBITDEPTH
    173 void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
    174                         int num_workers);
    175 void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
    176 
    177 // Deallocate loopfilter synchronization related mutex and data.
    178 void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
    179 void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
    180                           int width, int num_workers);
    181 
    182 void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync,
    183                                   int num_mis_in_lpf_unit_height_log2);
    184 
    185 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
    186                              struct macroblockd *xd, int plane_start,
    187                              int plane_end, int partial_frame,
    188                              AVxWorker *workers, int num_workers,
    189                              AV1LfSync *lf_sync, int lpf_opt_level);
    190 
    191 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    192 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
    193                                          struct AV1Common *cm,
    194                                          int optimized_lr, AVxWorker *workers,
    195                                          int num_workers, AV1LrSync *lr_sync,
    196                                          void *lr_ctxt, int do_extend_border);
    197 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync);
    198 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
    199                                int num_workers, int num_rows_lr,
    200                                int num_planes, int width);
    201 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    202 
    203 int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
    204 
    205 void av1_thread_loop_filter_rows(
    206    const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
    207    struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
    208    int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
    209    struct aom_internal_error_info *error_info,
    210    AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2);
    211 
    212 static AOM_FORCE_INLINE bool skip_loop_filter_plane(
    213    const int planes_to_lf[MAX_MB_PLANE], int plane, int lpf_opt_level) {
    214  // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
    215  // chroma planes together
    216  if (lpf_opt_level == 2) {
    217    if (plane == AOM_PLANE_Y) {
    218      return !planes_to_lf[plane];
    219    }
    220    if (plane == AOM_PLANE_U) {
    221      // U and V are handled together
    222      return !planes_to_lf[1] && !planes_to_lf[2];
    223    }
    224    assert(plane == AOM_PLANE_V);
    225    if (plane == AOM_PLANE_V) {
    226      // V is handled when u is filtered
    227      return true;
    228    }
    229  }
    230 
    231  // Normal operation mode
    232  return !planes_to_lf[plane];
    233 }
    234 
    235 static inline void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
    236                                   const int planes_to_lf[MAX_MB_PLANE],
    237                                   int lpf_opt_level,
    238                                   int num_mis_in_lpf_unit_height) {
    239  int mi_row, plane, dir;
    240  AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
    241  lf_sync->jobs_enqueued = 0;
    242  lf_sync->jobs_dequeued = 0;
    243 
    244  // Launch all vertical jobs first, as they are blocking the horizontal ones.
    245  // Launch top row jobs for all planes first, in case the output can be
    246  // partially reconstructed row by row.
    247  for (dir = 0; dir < 2; ++dir) {
    248    for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) {
    249      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    250        if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
    251          continue;
    252        }
    253        if (!planes_to_lf[plane]) continue;
    254        lf_job_queue->mi_row = mi_row;
    255        lf_job_queue->plane = plane;
    256        lf_job_queue->dir = dir;
    257        lf_job_queue->lpf_opt_level = lpf_opt_level;
    258        lf_job_queue++;
    259        lf_sync->jobs_enqueued++;
    260      }
    261    }
    262  }
    263 }
    264 
    265 static inline void loop_filter_frame_mt_init(
    266    AV1_COMMON *cm, int start_mi_row, int end_mi_row,
    267    const int planes_to_lf[MAX_MB_PLANE], int num_workers, AV1LfSync *lf_sync,
    268    int lpf_opt_level, int num_mis_in_lpf_unit_height_log2) {
    269  // Number of superblock rows
    270  const int sb_rows =
    271      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
    272 
    273  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
    274      num_workers > lf_sync->num_workers) {
    275    av1_loop_filter_dealloc(lf_sync);
    276    av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
    277  }
    278  lf_sync->lf_mt_exit = false;
    279 
    280  // Initialize cur_sb_col to -1 for all SB rows.
    281  for (int i = 0; i < MAX_MB_PLANE; i++) {
    282    memset(lf_sync->cur_sb_col[i], -1,
    283           sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
    284  }
    285 
    286  enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf,
    287                  lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2));
    288 }
    289 
    290 static inline AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
    291  AV1LfMTInfo *cur_job_info = NULL;
    292 
    293 #if CONFIG_MULTITHREAD
    294  pthread_mutex_lock(lf_sync->job_mutex);
    295 
    296  if (!lf_sync->lf_mt_exit && lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
    297    cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
    298    lf_sync->jobs_dequeued++;
    299  }
    300 
    301  pthread_mutex_unlock(lf_sync->job_mutex);
    302 #else
    303  (void)lf_sync;
    304 #endif
    305 
    306  return cur_job_info;
    307 }
    308 
    309 static inline void loop_filter_data_reset(LFWorkerData *lf_data,
    310                                          YV12_BUFFER_CONFIG *frame_buffer,
    311                                          struct AV1Common *cm,
    312                                          MACROBLOCKD *xd) {
    313  struct macroblockd_plane *pd = xd->plane;
    314  lf_data->frame_buffer = frame_buffer;
    315  lf_data->cm = cm;
    316  lf_data->xd = xd;
    317  for (int i = 0; i < MAX_MB_PLANE; i++) {
    318    lf_data->planes[i].dst = pd[i].dst;
    319    lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
    320    lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
    321  }
    322 }
    323 
    324 static inline void set_planes_to_loop_filter(const struct loopfilter *lf,
    325                                             int planes_to_lf[MAX_MB_PLANE],
    326                                             int plane_start, int plane_end) {
    327  // For each luma and chroma plane, whether to filter it or not.
    328  planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) &&
    329                    plane_start <= 0 && 0 < plane_end;
    330  planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end;
    331  planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end;
    332 }
    333 
    334 static inline int check_planes_to_loop_filter(const struct loopfilter *lf,
    335                                              int planes_to_lf[MAX_MB_PLANE],
    336                                              int plane_start, int plane_end) {
    337  set_planes_to_loop_filter(lf, planes_to_lf, plane_start, plane_end);
    338  // If the luma plane is purposely not filtered, neither are the chroma
    339  // planes.
    340  if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0;
    341  // Early exit.
    342  if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0;
    343  return 1;
    344 }
    345 
    346 #ifdef __cplusplus
    347 }  // extern "C"
    348 #endif
    349 
    350 #endif  // AOM_AV1_COMMON_THREAD_COMMON_H_