tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

av1_nn_predict_test.cc (8781B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <tuple>
     13 
     14 #include "gtest/gtest.h"
     15 
     16 #include "aom/aom_integer.h"
     17 #include "aom_ports/aom_timer.h"
     18 #include "av1/encoder/ml.h"
     19 #include "config/aom_config.h"
     20 #include "config/aom_dsp_rtcd.h"
     21 #include "config/av1_rtcd.h"
     22 #include "test/util.h"
     23 #include "test/register_state_check.h"
     24 #include "test/acm_random.h"
     25 
     26 namespace {
     27 using NnPredict_Func = void (*)(const float *const input_nodes,
     28                                const NN_CONFIG *const nn_config,
     29                                int reduce_prec, float *const output);
     30 
     31 using NnPredictTestParam = std::tuple<const NnPredict_Func>;
     32 
     33 const float epsilon = 1e-3f;  // Error threshold for functional equivalence
     34 
     35 class NnPredictTest : public ::testing::TestWithParam<NnPredictTestParam> {
     36 public:
     37  void SetUp() override {
     38    const int MAX_NODES2 = NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
     39    // Allocate two massive buffers on the heap for edge weights and node bias
     40    // Then set-up the double-dimension arrays pointing into the big buffers
     41    weights_buf = (float *)aom_malloc(MAX_NODES2 * (NN_MAX_HIDDEN_LAYERS + 1) *
     42                                      sizeof(*weights_buf));
     43    bias_buf =
     44        (float *)aom_malloc(NN_MAX_NODES_PER_LAYER *
     45                            (NN_MAX_HIDDEN_LAYERS + 1) * sizeof(*bias_buf));
     46    ASSERT_NE(weights_buf, nullptr);
     47    ASSERT_NE(bias_buf, nullptr);
     48    for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
     49      weights[i] = &weights_buf[i * MAX_NODES2];
     50      bias[i] = &bias_buf[i * NN_MAX_NODES_PER_LAYER];
     51    }
     52    target_func_ = GET_PARAM(0);
     53  }
     54  void TearDown() override {
     55    aom_free(weights_buf);
     56    aom_free(bias_buf);
     57  }
     58  void RunNnPredictTest(const NN_CONFIG *const shape);
     59  void RunNnPredictSpeedTest(const NN_CONFIG *const shape, const int run_times);
     60  void RunNnPredictTest_all(const NN_CONFIG *const shapes,
     61                            const int num_shapes);
     62  void RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
     63                                 const int num_shapes, const int run_times);
     64 
     65 private:
     66  NnPredict_Func target_func_;
     67  libaom_test::ACMRandom rng_;
     68  float *weights[NN_MAX_HIDDEN_LAYERS + 1] = {};
     69  float *bias[NN_MAX_HIDDEN_LAYERS + 1] = {};
     70  float *weights_buf = nullptr, *bias_buf = nullptr;
     71 };
     72 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NnPredictTest);
     73 
     74 void NnPredictTest::RunNnPredictTest(const NN_CONFIG *const shape) {
     75  float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
     76  float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
     77  float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
     78 
     79  NN_CONFIG nn_config = *shape;
     80 
     81  char shape_str[32] = { 0 };
     82  snprintf(shape_str, sizeof(shape_str), "%d", shape->num_inputs);
     83  for (int layer = 0; layer < shape->num_hidden_layers; layer++)
     84    snprintf(&shape_str[strlen(shape_str)],
     85             sizeof(shape_str) - strlen(shape_str), "x%d",
     86             shape->num_hidden_nodes[layer]);
     87  snprintf(&shape_str[strlen(shape_str)], sizeof(shape_str) - strlen(shape_str),
     88           "x%d", shape->num_outputs);
     89 
     90  for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
     91    nn_config.weights[i] = weights[i];
     92    nn_config.bias[i] = bias[i];
     93  }
     94 
     95  for (int iter = 0; iter < 10000 && !HasFatalFailure(); ++iter) {
     96    for (int node = 0; node < shape->num_inputs; node++) {
     97      inputs[node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
     98    }
     99    for (int layer = 0; layer < shape->num_hidden_layers; layer++) {
    100      for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
    101        bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
    102      }
    103      for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
    104           node++) {
    105        weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
    106      }
    107    }
    108    // Now the outputs:
    109    int layer = shape->num_hidden_layers;
    110    for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
    111      bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
    112    }
    113    for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
    114         node++) {
    115      weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
    116    }
    117 
    118    av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref);
    119    target_func_(inputs, &nn_config, 0, outputs_test);
    120 
    121    for (int node = 0; node < shape->num_outputs; node++) {
    122      if (outputs_ref[node] < epsilon) {
    123        ASSERT_LE(outputs_test[node], epsilon)
    124            << "Reference output was near-zero, test output was not ("
    125            << shape_str << ")";
    126      } else {
    127        const float error = outputs_ref[node] - outputs_test[node];
    128        const float relative_error = fabsf(error / outputs_ref[node]);
    129        ASSERT_LE(relative_error, epsilon)
    130            << "Excessive relative error between reference and test ("
    131            << shape_str << ")";
    132      }
    133    }
    134  }
    135 }
    136 
    137 void NnPredictTest::RunNnPredictSpeedTest(const NN_CONFIG *const shape,
    138                                          const int run_times) {
    139  float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
    140  float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
    141  float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
    142 
    143  NN_CONFIG nn_config = *shape;
    144 
    145  for (int i = 0; i < NN_MAX_HIDDEN_LAYERS; i++) {
    146    nn_config.weights[i] = weights[i];
    147    nn_config.bias[i] = bias[i];
    148  }
    149  // Don't bother actually changing the values for inputs/weights/bias: it
    150  // shouldn't make any difference for a speed test.
    151 
    152  aom_usec_timer timer;
    153  aom_usec_timer_start(&timer);
    154  for (int i = 0; i < run_times; ++i) {
    155    av1_nn_predict_c(inputs, &nn_config, 0, outputs_ref);
    156  }
    157  aom_usec_timer_mark(&timer);
    158  const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    159  aom_usec_timer_start(&timer);
    160  for (int i = 0; i < run_times; ++i) {
    161    target_func_(inputs, &nn_config, 0, outputs_test);
    162  }
    163  aom_usec_timer_mark(&timer);
    164  const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    165 
    166  printf("%d", shape->num_inputs);
    167  for (int layer = 0; layer < shape->num_hidden_layers; layer++)
    168    printf("x%d", shape->num_hidden_nodes[layer]);
    169  printf("x%d: ", shape->num_outputs);
    170  printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2);
    171 }
    172 
    173 // This is all the neural network shapes observed executed in a few different
    174 // runs of the encoder.  It also conveniently covers all the kernels
    175 // implemented.
    176 static const NN_CONFIG kShapes[] = {
    177  { 37, 1, 2, { 16, 24 }, {}, {} }, { 24, 24, 1, { 12 }, {}, {} },
    178  { 10, 16, 1, { 64 }, {}, {} },    { 12, 1, 1, { 12 }, {}, {} },
    179  { 12, 1, 1, { 24 }, {}, {} },     { 12, 1, 1, { 32 }, {}, {} },
    180  { 18, 4, 1, { 24 }, {}, {} },     { 18, 4, 1, { 32 }, {}, {} },
    181  { 4, 1, 1, { 16 }, {}, {} },      { 8, 1, 0, { 0 }, {}, {} },
    182  { 8, 4, 1, { 16 }, {}, {} },      { 8, 1, 1, { 32 }, {}, {} },
    183  { 9, 3, 1, { 32 }, {}, {} },      { 8, 4, 0, { 0 }, {}, {} },
    184  { 8, 8, 0, { 0 }, {}, {} },       { 4, 4, 1, { 8 }, {}, {} },
    185  { 4, 3, 0, { 64 }, {}, {} },
    186 };
    187 
    188 void NnPredictTest::RunNnPredictTest_all(const NN_CONFIG *const shapes,
    189                                         const int num_shapes) {
    190  for (int i = 0; i < num_shapes; i++) RunNnPredictTest(&shapes[i]);
    191 }
    192 
    193 void NnPredictTest::RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
    194                                              const int num_shapes,
    195                                              const int run_times) {
    196  for (int i = 0; i < num_shapes; i++)
    197    NnPredictTest::RunNnPredictSpeedTest(&shapes[i], run_times);
    198 }
    199 
    200 TEST_P(NnPredictTest, RandomValues) {
    201  RunNnPredictTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0]));
    202 }
    203 
    204 TEST_P(NnPredictTest, DISABLED_Speed) {
    205  RunNnPredictSpeedTest_all(kShapes, sizeof(kShapes) / sizeof(kShapes[0]),
    206                            10000000);
    207 }
    208 
    209 #if !CONFIG_EXCLUDE_SIMD_MISMATCH
    210 #if HAVE_SSE3
    211 INSTANTIATE_TEST_SUITE_P(SSE3, NnPredictTest,
    212                         ::testing::Values(av1_nn_predict_sse3));
    213 #endif
    214 
    215 #if HAVE_AVX2
    216 INSTANTIATE_TEST_SUITE_P(AVX2, NnPredictTest,
    217                         ::testing::Values(av1_nn_predict_avx2));
    218 #endif
    219 
    220 #if HAVE_NEON
    221 INSTANTIATE_TEST_SUITE_P(NEON, NnPredictTest,
    222                         ::testing::Values(av1_nn_predict_neon));
    223 #endif
    224 #endif  // !CONFIG_EXCLUDE_SIMD_MISMATCH
    225 
    226 }  // namespace