cnn.h (7968B)
1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_CNN_H_ 13 #define AOM_AV1_ENCODER_CNN_H_ 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif 18 19 #include <math.h> 20 #include <stdbool.h> 21 22 #include "aom_util/aom_thread.h" 23 #include "config/av1_rtcd.h" 24 25 struct AV1Common; 26 27 #define CNN_MAX_HIDDEN_LAYERS 64 28 #define CNN_MAX_LAYERS (CNN_MAX_HIDDEN_LAYERS + 1) 29 #define CNN_MAX_CHANNELS 256 30 #define CNN_MAX_BRANCHES 4 31 #define CNN_MAX_THREADS 32 32 33 #define NO_BRANCH_CONFIG { 0, 0, 0 } 34 #define NO_BN_PARAMS { NULL, NULL, NULL, NULL } 35 36 enum { 37 PADDING_SAME_ZERO, // tensorflow's SAME padding with pixels outside 38 // the image area assumed to be 0 (default) 39 PADDING_SAME_REPLICATE, // tensorflow's SAME padding with pixels outside 40 // the image area replicated from closest edge 41 PADDING_VALID // tensorflow's VALID padding 42 } UENUM1BYTE(PADDING_TYPE); 43 44 // enum { NONE, RELU, SOFTSIGN } UENUM1BYTE(ACTIVATION); 45 46 // Times when input tensor may be copied to branches given in input_to_branches. 47 // BRANCH_NO_COPY: doesn't copy any tensor. 48 // BRANCH_INPUT: copies the input tensor to branches. 49 // BRANCH_OUTPUT: copies the convolved tensor to branches. 50 // BRANCH_COMBINED: copies the combined (after convolving and branch combining) 51 // tensor. If no combinations happen at this layer, then this option 52 // has the same effect as COPY_OUTPUT. 53 enum { 54 BRANCH_NO_COPY, 55 BRANCH_INPUT, 56 BRANCH_OUTPUT, 57 BRANCH_COMBINED 58 } UENUM1BYTE(BRANCH_COPY); 59 60 // Types of combining branches with output of current layer: 61 // BRANCH_NOC: no branch combining 62 // BRANCH_ADD: Add previously stored branch tensor to output of layer 63 // BRANCH_CAT: Concatenate branch tensor to output of layer 64 enum { BRANCH_NOC, BRANCH_ADD, BRANCH_CAT } UENUM1BYTE(BRANCH_COMBINE); 65 66 // The parameters used to scale each channel in batch 67 // normalization. The processing in done on a per-channel basis. 68 // e.g. bn_mean[c] is the mean for all pixels in channel c. This 69 // is always applied after activation. The output is given by 70 // out[c,i,j] = norm[c,i,j] * bn_gamma[c] + bn_beta[c] where 71 // norm[c,i,j] = (in[c,i,j] - bn_mean[c]) / bn_std[c] 72 // here we assume that the effect of variance_epsilon is already 73 // taken into account when bn_std is calculated. The pointers 74 // needs to be either all zero or all valid. If all zero, then 75 // batchnorm is disabled, else batchnorm is applied. 76 struct CNN_BATCHNORM_PARAMS { 77 const float *bn_gamma; 78 const float *bn_beta; 79 const float *bn_mean; 80 const float *bn_std; 81 }; 82 83 struct CNN_BRANCH_CONFIG { 84 int input_to_branches; // If nonzero, copy the active tensor to the current 85 // layer and store for future use in branches 86 // specified in the field as a binary mask. For 87 // example, if input_to_branch = 0x06, it means the 88 // input tensor to the current branch is copied to 89 // branches 1 and 2 (where 0 represents the primary 90 // branch). One restriction is that the mask 91 // cannot indicate copying to the current branch. 92 // If greater than 0, only copies the channels up 93 // to the given index. 94 int channels_to_copy; // Within the layer, input a copy of active 95 // tensor to branches given in input_to_branches. 96 int branches_to_combine; // mask of branches to combine with output of 97 // current layer, if 98 // branch_combine_type != BRANCH_NOC 99 // For example, if branches_to_combine = 0x0A, 100 // it means that braches 1 and 3 are combined 101 // with the current branch. 102 }; 103 104 struct CNN_LAYER_CONFIG { 105 int in_channels; 106 int filter_width; 107 int filter_height; 108 int out_channels; 109 int skip_width; 110 int skip_height; 111 int maxpool; // whether to use maxpool or not (only effective when 112 // skip width or skip_height are > 1) 113 const float *weights; // array of length filter_height x filter_width x 114 // in_channels x out_channels where the inner-most 115 // scan is out_channels and the outer most scan is 116 // filter_height. 117 const float *bias; // array of length out_channels 118 PADDING_TYPE pad; // padding type 119 ACTIVATION activation; // the activation function to use after convolution 120 int deconvolve; // whether this is a deconvolution layer. 121 // 0: If skip_width or skip_height are > 1, then we 122 // reduce resolution 123 // 1: If skip_width or skip_height are > 1, then we 124 // increase resolution 125 int branch; // branch index in [0, CNN_MAX_BRANCHES - 1], where 126 // 0 refers to the primary branch. 127 BRANCH_COPY branch_copy_type; 128 BRANCH_COMBINE branch_combine_type; 129 struct CNN_BRANCH_CONFIG branch_config; 130 struct CNN_BATCHNORM_PARAMS 131 bn_params; // A struct that contains the parameters 132 // used for batch normalization. 133 int output_num; // The output buffer idx to which the layer output is 134 // written. Set to -1 to disable writing it to the output. In 135 // the case that branch_combine_type is BRANCH_CAT, all 136 // concatenated channels will be written to output. In the 137 // case of BRANCH_ADD, the output will be the result of 138 // summation. 139 }; 140 141 struct CNN_CONFIG { 142 int num_layers; // number of CNN layers ( = number of hidden layers + 1) 143 int is_residue; // whether the output activation is a residue 144 int ext_width, ext_height; // extension horizontally and vertically 145 int strict_bounds; // whether the input bounds are strict or not. 146 // If strict, the extension area is filled by 147 // replication; if not strict, image data is 148 // assumed available beyond the bounds. 149 CNN_LAYER_CONFIG layer_config[CNN_MAX_LAYERS]; 150 }; 151 152 struct CNN_THREAD_DATA { 153 int num_workers; 154 AVxWorker *workers; 155 }; 156 157 struct CNN_MULTI_OUT { 158 int num_outputs; 159 const int *output_channels; 160 const int *output_strides; 161 float **output_buffer; 162 }; 163 164 // Function to return size of output 165 void av1_find_cnn_output_size(int in_width, int in_height, 166 const CNN_CONFIG *cnn_config, int *out_width, 167 int *out_height, int *out_channels); 168 169 // Function to return output width and output height of given layer. 170 void av1_find_cnn_layer_output_size(int in_width, int in_height, 171 const CNN_LAYER_CONFIG *layer_config, 172 int *out_width, int *out_height); 173 174 // Prediction functions from set of input image buffers. This function supports 175 // CNN with multiple outputs. 176 bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height, 177 int stride, const CNN_CONFIG *cnn_config, 178 const CNN_THREAD_DATA *thread_data, 179 struct CNN_MULTI_OUT *output); 180 bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height, 181 int stride, 182 const CNN_CONFIG *cnn_config, 183 const CNN_THREAD_DATA *thread_data, 184 int bit_depth, CNN_MULTI_OUT *output); 185 #ifdef __cplusplus 186 } // extern "C" 187 #endif 188 189 #endif // AOM_AV1_ENCODER_CNN_H_