[ tor-browser ].git.dasho

cnn.h (7968B)
      1 /*
      2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AV1_ENCODER_CNN_H_
     13 #define AOM_AV1_ENCODER_CNN_H_
     14 
     15 #ifdef __cplusplus
     16 extern "C" {
     17 #endif
     18 
     19 #include <math.h>
     20 #include <stdbool.h>
     21 
     22 #include "aom_util/aom_thread.h"
     23 #include "config/av1_rtcd.h"
     24 
     25 struct AV1Common;
     26 
     27 #define CNN_MAX_HIDDEN_LAYERS 64
     28 #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1)
     29 #define CNN_MAX_CHANNELS 256
     30 #define CNN_MAX_BRANCHES 4
     31 #define CNN_MAX_THREADS 32
     32 
     33 #define NO_BRANCH_CONFIG { 0, 0, 0 }
     34 #define NO_BN_PARAMS { NULL, NULL, NULL, NULL }
     35 
     36 enum {
     37  PADDING_SAME_ZERO,       // tensorflow's SAME padding with pixels outside
     38                           // the image area assumed to be 0 (default)
     39  PADDING_SAME_REPLICATE,  // tensorflow's SAME padding with pixels outside
     40                           // the image area replicated from closest edge
     41  PADDING_VALID            // tensorflow's VALID padding
     42 } UENUM1BYTE(PADDING_TYPE);
     43 
     44 // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION);
     45 
     46 // Times when input tensor may be copied to branches given in input_to_branches.
     47 // BRANCH_NO_COPY: doesn't copy any tensor.
     48 // BRANCH_INPUT: copies the input tensor to branches.
     49 // BRANCH_OUTPUT: copies the convolved tensor to branches.
     50 // BRANCH_COMBINED: copies the combined (after convolving and branch combining)
     51 //   tensor. If no combinations happen at this layer, then this option
     52 //   has the same effect as COPY_OUTPUT.
     53 enum {
     54  BRANCH_NO_COPY,
     55  BRANCH_INPUT,
     56  BRANCH_OUTPUT,
     57  BRANCH_COMBINED
     58 } UENUM1BYTE(BRANCH_COPY);
     59 
     60 // Types of combining branches with output of current layer:
     61 // BRANCH_NOC: no branch combining
     62 // BRANCH_ADD: Add previously stored branch tensor to output of layer
     63 // BRANCH_CAT: Concatenate branch tensor to output of layer
     64 enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE);
     65 
     66 // The parameters used to scale each channel in batch
     67 // normalization. The processing in done on a per-channel basis.
     68 // e.g. bn_mean[c] is the mean for all pixels in channel c. This
     69 // is always applied after activation. The output is given by
     70 // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where
     71 // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c]
     72 // here we assume that the effect of variance_epsilon is already
     73 // taken into account when bn_std is calculated. The pointers
     74 // needs to be either all zero or all valid. If all zero, then
     75 // batchnorm is disabled, else batchnorm is applied.
     76 struct CNN_BATCHNORM_PARAMS {
     77  const float *bn_gamma;
     78  const float *bn_beta;
     79  const float *bn_mean;
     80  const float *bn_std;
     81 };
     82 
     83 struct CNN_BRANCH_CONFIG {
     84  int input_to_branches;  // If nonzero, copy the active tensor to the current
     85  // layer and store for future use in branches
     86  // specified in the field as a binary mask. For
     87  // example, if input_to_branch = 0x06, it means the
     88  // input tensor to the current branch is copied to
     89  // branches 1 and 2 (where 0 represents the primary
     90  // branch). One restriction is that the mask
     91  // cannot indicate copying to the current branch.
     92  // If greater than 0, only copies the channels up
     93  // to the given index.
     94  int channels_to_copy;  // Within the layer, input a copy of active
     95  // tensor to branches given in input_to_branches.
     96  int branches_to_combine;  // mask of branches to combine with output of
     97  // current layer, if
     98  // branch_combine_type != BRANCH_NOC
     99  // For example, if branches_to_combine = 0x0A,
    100  // it means that braches 1 and 3 are combined
    101  // with the current branch.
    102 };
    103 
    104 struct CNN_LAYER_CONFIG {
    105  int in_channels;
    106  int filter_width;
    107  int filter_height;
    108  int out_channels;
    109  int skip_width;
    110  int skip_height;
    111  int maxpool;            // whether to use maxpool or not (only effective when
    112                          // skip width or skip_height are > 1)
    113  const float *weights;   // array of length filter_height x filter_width x
    114                          // in_channels x out_channels where the inner-most
    115                          // scan is out_channels and the outer most scan is
    116                          // filter_height.
    117  const float *bias;      // array of length out_channels
    118  PADDING_TYPE pad;       // padding type
    119  ACTIVATION activation;  // the activation function to use after convolution
    120  int deconvolve;         // whether this is a deconvolution layer.
    121                          // 0: If skip_width or skip_height are > 1, then we
    122                          // reduce resolution
    123                          // 1: If skip_width or skip_height are > 1, then we
    124                          // increase resolution
    125  int branch;             // branch index in [0, CNN_MAX_BRANCHES - 1], where
    126                          // 0 refers to the primary branch.
    127  BRANCH_COPY branch_copy_type;
    128  BRANCH_COMBINE branch_combine_type;
    129  struct CNN_BRANCH_CONFIG branch_config;
    130  struct CNN_BATCHNORM_PARAMS
    131      bn_params;   // A struct that contains the parameters
    132                   // used for batch normalization.
    133  int output_num;  // The output buffer idx to which the layer output is
    134                   // written. Set to -1 to disable writing it to the output. In
    135                   // the case that branch_combine_type is BRANCH_CAT, all
    136                   // concatenated channels will be written to output. In the
    137                   // case of BRANCH_ADD, the output will be the result of
    138                   // summation.
    139 };
    140 
    141 struct CNN_CONFIG {
    142  int num_layers;  // number of CNN layers ( = number of hidden layers + 1)
    143  int is_residue;  // whether the output activation is a residue
    144  int ext_width, ext_height;  // extension horizontally and vertically
    145  int strict_bounds;          // whether the input bounds are strict or not.
    146                              // If strict, the extension area is filled by
    147                              // replication; if not strict, image data is
    148                              // assumed available beyond the bounds.
    149  CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS];
    150 };
    151 
    152 struct CNN_THREAD_DATA {
    153  int num_workers;
    154  AVxWorker *workers;
    155 };
    156 
    157 struct CNN_MULTI_OUT {
    158  int num_outputs;
    159  const int *output_channels;
    160  const int *output_strides;
    161  float **output_buffer;
    162 };
    163 
    164 // Function to return size of output
    165 void av1_find_cnn_output_size(int in_width, int in_height,
    166                              const CNN_CONFIG *cnn_config, int *out_width,
    167                              int *out_height, int *out_channels);
    168 
    169 // Function to return output width and output height of given layer.
    170 void av1_find_cnn_layer_output_size(int in_width, int in_height,
    171                                    const CNN_LAYER_CONFIG *layer_config,
    172                                    int *out_width, int *out_height);
    173 
    174 // Prediction functions from set of input image buffers. This function supports
    175 // CNN with multiple outputs.
    176 bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
    177                                   int stride, const CNN_CONFIG *cnn_config,
    178                                   const CNN_THREAD_DATA *thread_data,
    179                                   struct CNN_MULTI_OUT *output);
    180 bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
    181                                          int stride,
    182                                          const CNN_CONFIG *cnn_config,
    183                                          const CNN_THREAD_DATA *thread_data,
    184                                          int bit_depth, CNN_MULTI_OUT *output);
    185 #ifdef __cplusplus
    186 }  // extern "C"
    187 #endif
    188 
    189 #endif  // AOM_AV1_ENCODER_CNN_H_
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE