tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

simd_impl.h (53892B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <tuple>
     13 
     14 #define SIMD_CHECK 1
     15 #include "aom_dsp/aom_simd_inline.h"
     16 #include "aom_dsp/simd/v256_intrinsics_c.h"
     17 #include "gtest/gtest.h"
     18 #include "test/register_state_check.h"
     19 
     20 namespace SIMD_NAMESPACE {
     21 
     22 template <typename param_signature>
     23 class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
     24 public:
     25  ~TestIntrinsic() override = default;
     26  void SetUp() override {
     27    mask = std::get<0>(this->GetParam());
     28    maskwidth = std::get<1>(this->GetParam());
     29    name = std::get<2>(this->GetParam());
     30  }
     31 
     32 protected:
     33  uint32_t mask, maskwidth;
     34  const char *name;
     35 };
     36 
     37 // Create one typedef for each function signature
     38 #define TYPEDEF_SIMD(name)   \
     39  using ARCH_POSTFIX(name) = \
     40      TestIntrinsic<std::tuple<uint32_t, uint32_t, const char *> >
     41 
     42 TYPEDEF_SIMD(V64_U8);
     43 TYPEDEF_SIMD(V64_U16);
     44 TYPEDEF_SIMD(V64_U32);
     45 TYPEDEF_SIMD(V64_V64);
     46 TYPEDEF_SIMD(U32_V64);
     47 TYPEDEF_SIMD(S32_V64);
     48 TYPEDEF_SIMD(U64_V64);
     49 TYPEDEF_SIMD(S64_V64);
     50 TYPEDEF_SIMD(V64_U32U32);
     51 TYPEDEF_SIMD(V64_V64V64);
     52 TYPEDEF_SIMD(S64_V64V64);
     53 TYPEDEF_SIMD(V64_V64U32);
     54 TYPEDEF_SIMD(U32_V64V64);
     55 TYPEDEF_SIMD(V128_V64);
     56 TYPEDEF_SIMD(V128_V128);
     57 TYPEDEF_SIMD(U32_V128);
     58 TYPEDEF_SIMD(U64_V128);
     59 TYPEDEF_SIMD(V64_V128);
     60 TYPEDEF_SIMD(V128_U8);
     61 TYPEDEF_SIMD(V128_U16);
     62 TYPEDEF_SIMD(V128_U32);
     63 TYPEDEF_SIMD(V128_U64);
     64 TYPEDEF_SIMD(V128_U64U64);
     65 TYPEDEF_SIMD(V128_V64V64);
     66 TYPEDEF_SIMD(V128_V128V128);
     67 TYPEDEF_SIMD(V128_V128V128V128);
     68 TYPEDEF_SIMD(S64_V128V128);
     69 TYPEDEF_SIMD(V128_V128U32);
     70 TYPEDEF_SIMD(U32_V128V128);
     71 TYPEDEF_SIMD(U64_V128V128);
     72 TYPEDEF_SIMD(V256_V128);
     73 TYPEDEF_SIMD(V256_V256);
     74 TYPEDEF_SIMD(U64_V256);
     75 TYPEDEF_SIMD(V256_V128V128);
     76 TYPEDEF_SIMD(V256_V256V256);
     77 TYPEDEF_SIMD(V256_V256V256V256);
     78 TYPEDEF_SIMD(U64_V256V256);
     79 TYPEDEF_SIMD(S64_V256V256);
     80 TYPEDEF_SIMD(V256_V256U32);
     81 TYPEDEF_SIMD(U32_V256V256);
     82 TYPEDEF_SIMD(V256_U8);
     83 TYPEDEF_SIMD(V256_U16);
     84 TYPEDEF_SIMD(V256_U32);
     85 TYPEDEF_SIMD(V256_U64);
     86 TYPEDEF_SIMD(U32_V256);
     87 TYPEDEF_SIMD(V64_V256);
     88 
     89 // Google Test allows up to 50 tests per case, so split the largest
     90 using ARCH_POSTFIX(V64_V64_Part2) = ARCH_POSTFIX(V64_V64);
     91 using ARCH_POSTFIX(V64_V64V64_Part2) = ARCH_POSTFIX(V64_V64V64);
     92 using ARCH_POSTFIX(V128_V128_Part2) = ARCH_POSTFIX(V128_V128);
     93 using ARCH_POSTFIX(V128_V128_Part3) = ARCH_POSTFIX(V128_V128);
     94 using ARCH_POSTFIX(V128_V128_Part4) = ARCH_POSTFIX(V128_V128);
     95 using ARCH_POSTFIX(V128_V128V128_Part2) = ARCH_POSTFIX(V128_V128V128);
     96 using ARCH_POSTFIX(V256_V256_Part2) = ARCH_POSTFIX(V256_V256);
     97 using ARCH_POSTFIX(V256_V256_Part3) = ARCH_POSTFIX(V256_V256);
     98 using ARCH_POSTFIX(V256_V256_Part4) = ARCH_POSTFIX(V256_V256);
     99 using ARCH_POSTFIX(V256_V256_Part5) = ARCH_POSTFIX(V256_V256);
    100 using ARCH_POSTFIX(V256_V256V256_Part2) = ARCH_POSTFIX(V256_V256V256);
    101 
    102 // These functions are machine tuned located elsewhere
    103 template <typename c_ret, typename c_arg>
    104 void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
    105                  const char *name);
    106 
    107 template <typename c_ret, typename c_arg1, typename c_arg2>
    108 void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
    109                   const char *name);
    110 
    111 template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
    112 void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
    113                   const char *name);
    114 
    115 const int kIterations = 65536;
    116 
    117 // Add a macro layer since TEST_P will quote the name so we need to
    118 // expand it first with the prefix.
    119 #define MY_TEST_P(name, test) TEST_P(name, test)
    120 
    121 MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
    122  TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
    123 }
    124 
    125 MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
    126  TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
    127 }
    128 
    129 MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
    130  TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
    131 }
    132 
    133 MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
    134  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
    135 }
    136 
    137 MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
    138  TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
    139 }
    140 
    141 MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
    142  TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
    143 }
    144 
    145 MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
    146  TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
    147 }
    148 
    149 MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
    150  TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
    151 }
    152 
    153 MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
    154  TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
    155 }
    156 
    157 MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
    158  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
    159 }
    160 
    161 MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
    162  TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
    163 }
    164 
    165 MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
    166  TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
    167 }
    168 
    169 MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
    170  TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
    171 }
    172 
    173 // Google Test allows up to 50 tests per case, so split the largest
    174 MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
    175  TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
    176 }
    177 
    178 MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
    179  TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
    180 }
    181 
    182 MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
    183  TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
    184 }
    185 
    186 MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
    187  TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
    188 }
    189 
    190 MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
    191  TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
    192 }
    193 
    194 MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
    195  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
    196 }
    197 
    198 MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
    199  TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
    200 }
    201 
    202 MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
    203  TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
    204 }
    205 
    206 MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
    207  TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
    208 }
    209 
    210 MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
    211  TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
    212 }
    213 
    214 MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
    215  TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
    216 }
    217 
    218 MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
    219  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    220 }
    221 
    222 MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
    223  TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
    224                                                name);
    225 }
    226 
    227 MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
    228  TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    229 }
    230 
    231 MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
    232  TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    233 }
    234 
    235 MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
    236  TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    237 }
    238 
    239 MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
    240  TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
    241 }
    242 
    243 MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
    244  TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
    245 }
    246 
    247 MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
    248  TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
    249 }
    250 
    251 MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
    252  TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    253 }
    254 
    255 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
    256  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
    257 }
    258 
    259 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
    260  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
    261 }
    262 
    263 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
    264  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
    265 }
    266 
    267 MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
    268  TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
    269 }
    270 
    271 MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
    272  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
    273 }
    274 
    275 MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
    276  TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
    277 }
    278 
    279 MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
    280  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
    281 }
    282 
    283 MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
    284  TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
    285                                                name);
    286 }
    287 
    288 MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
    289  TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
    290 }
    291 
    292 MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
    293  TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
    294 }
    295 
    296 MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
    297  TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
    298 }
    299 
    300 MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
    301  TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
    302 }
    303 
    304 MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
    305  TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
    306 }
    307 
    308 MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
    309  TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
    310 }
    311 
    312 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
    313  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
    314 }
    315 
    316 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
    317  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
    318 }
    319 
    320 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
    321  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
    322 }
    323 
    324 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
    325  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
    326 }
    327 
    328 MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
    329  TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
    330 }
    331 
    332 MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
    333  TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
    334 }
    335 
    336 MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
    337  TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
    338 }
    339 
    340 MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
    341  TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
    342 }
    343 
    344 MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
    345  TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
    346 }
    347 
    348 MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
    349  TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
    350 }
    351 
    352 // Add a macro layer since INSTANTIATE_TEST_SUITE_P will quote the name
    353 // so we need to expand it first with the prefix
    354 #define INSTANTIATE(name, type, ...) \
    355  INSTANTIATE_TEST_SUITE_P(name, type, ::testing::Values(__VA_ARGS__))
    356 
    357 #define SIMD_TUPLE(name, mask, maskwidth) \
    358  std::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
    359 
    360 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64), SIMD_TUPLE(v64_sad_u8, 0U, 0U),
    361            SIMD_TUPLE(v64_ssd_u8, 0U, 0U));
    362 
    363 INSTANTIATE(
    364    ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
    365    SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
    366    SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
    367    SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
    368    SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
    369    SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
    370    SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
    371    SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
    372    SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
    373    SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
    374    SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
    375    SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
    376    SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
    377    SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
    378    SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
    379    SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
    380    SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
    381    SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
    382    SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
    383    SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
    384    SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
    385    SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
    386    SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
    387    SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
    388    SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
    389 
    390 INSTANTIATE(
    391    ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
    392    SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
    393    SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
    394    SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
    395    SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
    396    SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
    397    SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
    398 
    399 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
    400            SIMD_TUPLE(v64_abs_s16, 0U, 0U),
    401            SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
    402            SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
    403            SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
    404            SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
    405            SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
    406            SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
    407            SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
    408            SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
    409            SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
    410            SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
    411            SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
    412            SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
    413            SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
    414            SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
    415            SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
    416            SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
    417            SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
    418            SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
    419            SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
    420            SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
    421            SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
    422            SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
    423            SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
    424            SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
    425            SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
    426            SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
    427            SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
    428            SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
    429            SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
    430            SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
    431            SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
    432            SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
    433            SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
    434            SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
    435            SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
    436            SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
    437            SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
    438            SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
    439            SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
    440            SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
    441            SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
    442            SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
    443            SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
    444            SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
    445            SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
    446            SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
    447            SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
    448            SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
    449 
    450 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
    451            SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
    452            SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
    453            SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
    454            SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
    455            SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
    456            SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
    457            SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
    458            SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
    459            SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
    460            SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
    461            SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
    462            SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
    463            SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
    464            SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
    465            SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
    466            SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
    467            SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
    468            SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
    469            SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
    470            SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
    471            SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
    472            SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
    473            SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
    474            SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
    475            SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
    476            SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
    477            SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
    478            SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
    479            SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
    480            SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
    481            SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
    482            SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
    483            SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
    484            SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
    485            SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
    486            SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
    487            SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
    488            SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
    489            SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
    490            SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
    491            SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
    492            SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
    493            SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
    494 
    495 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
    496            SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
    497            SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
    498            SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
    499            SIMD_TUPLE(v64_shr_u32, 31U, 32U),
    500            SIMD_TUPLE(v64_shr_s32, 31U, 32U));
    501 
    502 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
    503            SIMD_TUPLE(v64_u64, 0U, 0U));
    504 
    505 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
    506 
    507 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
    508            SIMD_TUPLE(v64_high_u32, 0U, 0U));
    509 
    510 INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
    511            SIMD_TUPLE(v64_high_s32, 0U, 0U));
    512 
    513 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
    514            SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
    515 
    516 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
    517 
    518 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
    519 
    520 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
    521 
    522 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
    523 
    524 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
    525            SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
    526 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
    527 
    528 INSTANTIATE(
    529    ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
    530    SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
    531    SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
    532    SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
    533    SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
    534    SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
    535    SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
    536    SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
    537    SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
    538    SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
    539    SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
    540    SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
    541    SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
    542    SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
    543    SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
    544    SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
    545    SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
    546    SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
    547    SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
    548    SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
    549    SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
    550    SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
    551    SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
    552    SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
    553    SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
    554    SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
    555 
    556 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
    557            SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
    558            SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
    559            SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
    560            SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
    561            SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
    562            SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
    563            SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
    564            SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
    565            SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
    566            SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
    567            SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
    568            SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
    569            SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
    570            SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
    571            SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
    572            SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
    573            SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
    574            SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
    575            SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
    576            SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
    577            SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
    578            SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
    579            SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
    580            SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
    581            SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
    582 
    583 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
    584            SIMD_TUPLE(v128_blend_8, 0U, 0U));
    585 
    586 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
    587            SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
    588            SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
    589            SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
    590            SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
    591            SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
    592            SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
    593            SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
    594            SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
    595            SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
    596            SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
    597            SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
    598            SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
    599            SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
    600            SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
    601            SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
    602            SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
    603            SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
    604            SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
    605            SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
    606            SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
    607            SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
    608            SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
    609            SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
    610            SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
    611            SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
    612            SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
    613            SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
    614            SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
    615            SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
    616            SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
    617            SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
    618            SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
    619            SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
    620            SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
    621            SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
    622            SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
    623            SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
    624            SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
    625            SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
    626            SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
    627            SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
    628            SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
    629            SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
    630            SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
    631            SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
    632            SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
    633            SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
    634 
    635 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
    636            SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
    637            SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
    638            SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
    639            SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
    640            SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
    641            SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
    642            SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
    643            SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
    644            SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
    645            SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
    646            SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
    647            SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
    648            SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
    649            SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
    650            SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
    651            SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
    652            SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
    653            SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
    654            SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
    655            SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
    656            SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
    657            SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
    658            SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
    659            SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
    660            SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
    661            SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
    662            SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
    663            SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
    664            SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
    665            SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
    666            SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
    667            SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
    668            SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
    669            SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
    670            SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
    671            SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
    672            SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
    673            SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
    674            SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
    675            SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
    676            SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
    677            SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
    678            SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
    679            SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
    680            SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
    681            SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
    682            SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
    683 
    684 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
    685            SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
    686            SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
    687            SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
    688            SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
    689            SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
    690            SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
    691            SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
    692            SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
    693            SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
    694            SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
    695            SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
    696            SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
    697            SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
    698            SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
    699 
    700 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
    701            SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
    702            SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
    703            SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
    704            SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
    705            SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
    706            SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
    707            SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
    708            SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
    709            SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
    710            SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
    711            SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
    712            SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
    713            SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
    714            SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
    715            SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
    716            SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
    717            SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
    718            SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
    719            SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
    720            SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
    721            SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
    722            SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
    723            SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
    724            SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
    725            SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
    726            SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
    727            SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
    728            SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
    729            SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
    730            SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
    731            SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
    732            SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
    733            SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
    734            SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
    735            SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
    736            SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
    737            SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
    738            SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
    739            SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
    740            SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
    741            SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
    742            SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
    743            SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
    744            SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
    745            SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
    746            SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
    747            SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
    748            SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
    749            SIMD_TUPLE(v128_padd_u8, 0U, 0U));
    750 
    751 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
    752            SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
    753            SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
    754 
    755 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
    756 
    757 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
    758            SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
    759            SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
    760            SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
    761            SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
    762 
    763 INSTANTIATE(
    764    ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
    765    SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
    766    SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
    767    SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
    768    SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
    769    SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
    770    SIMD_TUPLE(v128_shr_s64, 63U, 32U));
    771 
    772 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
    773            SIMD_TUPLE(v128_movemask_8, 0U, 0U));
    774 
    775 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
    776 
    777 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
    778            SIMD_TUPLE(v128_high_v64, 0U, 0U));
    779 
    780 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
    781 
    782 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
    783 
    784 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
    785 
    786 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
    787 
    788 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
    789            SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
    790            SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
    791 
    792 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
    793            SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
    794 
    795 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
    796            SIMD_TUPLE(v256_low_u64, 0U, 0U));
    797 
    798 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
    799            SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
    800            SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
    801 
    802 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
    803 
    804 INSTANTIATE(
    805    ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
    806    SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
    807    SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
    808    SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
    809    SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
    810    SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
    811    SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
    812    SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
    813    SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
    814    SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
    815    SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
    816    SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
    817    SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
    818    SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
    819    SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
    820    SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
    821    SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
    822    SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
    823    SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
    824    SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
    825    SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
    826    SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
    827    SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
    828    SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
    829    SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
    830    SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
    831 
    832 INSTANTIATE(
    833    ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
    834    SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
    835    SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
    836    SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
    837    SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
    838    SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
    839    SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
    840    SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
    841    SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
    842    SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
    843    SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
    844    SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
    845    SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
    846    SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
    847    SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
    848    SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
    849    SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
    850    SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
    851    SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
    852    SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
    853    SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
    854    SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
    855    SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
    856    SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
    857    SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
    858    SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
    859    SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
    860    SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
    861    SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
    862    SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
    863    SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
    864    SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
    865    SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
    866    SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
    867    SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
    868    SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
    869    SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
    870    SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
    871    SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
    872    SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
    873 
    874 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
    875            SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
    876            SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
    877            SIMD_TUPLE(v256_mul_s16, 0U, 0U));
    878 
    879 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
    880            SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
    881            SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
    882            SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
    883            SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
    884 
    885 INSTANTIATE(
    886    ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
    887    SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
    888    SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
    889    SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
    890    SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
    891    SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
    892    SIMD_TUPLE(v256_shr_s64, 63U, 32U));
    893 
    894 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
    895            SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
    896            SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
    897            SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
    898            SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
    899            SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
    900            SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
    901            SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
    902            SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
    903            SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
    904            SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
    905            SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
    906            SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
    907            SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
    908            SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
    909            SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
    910            SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
    911            SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
    912            SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
    913            SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
    914            SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
    915            SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
    916            SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
    917            SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
    918            SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
    919            SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
    920            SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
    921            SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
    922            SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
    923            SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
    924            SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
    925            SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
    926            SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
    927            SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
    928            SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
    929            SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
    930            SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
    931            SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
    932            SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
    933            SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
    934            SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
    935            SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
    936            SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
    937            SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
    938            SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
    939            SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
    940            SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
    941            SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
    942            SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
    943 
    944 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
    945            SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
    946            SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
    947            SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
    948            SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
    949            SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
    950            SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
    951            SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
    952            SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
    953            SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
    954            SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
    955            SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
    956            SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
    957            SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
    958            SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
    959            SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
    960            SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
    961            SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
    962            SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
    963            SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
    964            SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
    965            SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
    966            SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
    967            SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
    968            SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
    969            SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
    970            SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
    971            SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
    972            SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
    973            SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
    974            SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
    975            SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
    976            SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
    977            SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
    978            SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
    979            SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
    980            SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
    981            SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
    982            SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
    983            SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
    984            SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
    985            SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
    986            SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
    987            SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
    988            SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
    989            SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
    990            SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
    991            SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
    992            SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
    993            SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
    994            SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
    995 
    996 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
    997            SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
    998            SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
    999            SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
   1000            SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
   1001            SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
   1002            SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
   1003            SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
   1004            SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
   1005            SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
   1006            SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
   1007            SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
   1008            SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
   1009            SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
   1010            SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
   1011            SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
   1012            SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
   1013            SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
   1014            SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
   1015            SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
   1016            SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
   1017            SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
   1018            SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
   1019            SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
   1020            SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
   1021            SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
   1022            SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
   1023            SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
   1024            SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
   1025            SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
   1026            SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
   1027            SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
   1028            SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
   1029            SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
   1030            SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
   1031            SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
   1032            SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
   1033            SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
   1034            SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
   1035            SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
   1036            SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
   1037            SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
   1038            SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
   1039 
   1040 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
   1041            SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
   1042            SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
   1043            SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
   1044            SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
   1045            SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
   1046            SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
   1047            SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
   1048            SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
   1049            SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
   1050            SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
   1051            SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
   1052            SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
   1053            SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
   1054            SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
   1055            SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
   1056            SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
   1057            SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
   1058            SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
   1059            SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
   1060            SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
   1061            SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
   1062            SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
   1063            SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
   1064            SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
   1065            SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
   1066            SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
   1067            SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
   1068            SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
   1069            SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
   1070            SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
   1071            SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
   1072            SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
   1073            SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
   1074            SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
   1075            SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
   1076            SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
   1077            SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
   1078            SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
   1079            SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
   1080            SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
   1081            SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
   1082            SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
   1083            SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
   1084            SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
   1085            SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
   1086            SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
   1087            SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
   1088            SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
   1089            SIMD_TUPLE(v256_padd_u8, 0U, 0U));
   1090 
   1091 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
   1092            SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
   1093            SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
   1094            SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
   1095            SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
   1096            SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
   1097            SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
   1098            SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
   1099            SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
   1100            SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
   1101            SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
   1102            SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
   1103            SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
   1104            SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
   1105            SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
   1106            SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
   1107            SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
   1108            SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
   1109            SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
   1110            SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
   1111            SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
   1112            SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
   1113            SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
   1114            SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
   1115            SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
   1116            SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
   1117            SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
   1118            SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
   1119            SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
   1120            SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
   1121            SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
   1122 
   1123 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
   1124            SIMD_TUPLE(v256_blend_8, 0U, 0U),
   1125            SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
   1126 
   1127 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
   1128 
   1129 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
   1130 
   1131 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
   1132 
   1133 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
   1134 
   1135 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
   1136            SIMD_TUPLE(v256_movemask_8, 0U, 0U));
   1137 
   1138 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
   1139 
   1140 }  // namespace SIMD_NAMESPACE