tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

libintvector.h (37259B)


      1 #ifndef __Vec_Intrin_H
      2 #define __Vec_Intrin_H
      3 
      4 #include <sys/types.h>
      5 
      6 /* We include config.h here to ensure that the various feature-flags are
      7 * properly brought into scope. Users can either run the configure script, or
      8 * write a config.h themselves and put it under version control. */
      9 #if defined(__has_include)
     10 #if __has_include("config.h")
     11 #include "config.h"
     12 #endif
     13 #endif
     14 
     15 /* # DEBUGGING:
     16 * ============
     17 * It is possible to debug the current definitions by using libintvector_debug.h
     18 * See the include at the bottom of the file. */
     19 
     20 #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1)
     21 
     22 #if defined(__x86_64__) || defined(_M_X64)
     23 
     24 #if defined(HACL_CAN_COMPILE_VEC128)
     25 
     26 #include <emmintrin.h>
     27 #include <tmmintrin.h>
     28 #include <smmintrin.h>
     29 
     30 typedef __m128i Lib_IntVector_Intrinsics_vec128;
     31 
     32 #define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \
     33    (_mm_aesenc_si128(x0, x1))
     34 
     35 #define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \
     36    (_mm_aesenclast_si128(x0, x1))
     37 
     38 #define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \
     39    (_mm_aeskeygenassist_si128(x0, x1))
     40 
     41 #define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \
     42    (_mm_clmulepi64_si128(x0, x1, x2))
     43 
     44 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
     45    (_mm_xor_si128(x0, x1))
     46 
     47 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
     48    (_mm_cmpeq_epi64(x0, x1))
     49 
     50 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
     51    (_mm_cmpeq_epi32(x0, x1))
     52 
     53 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
     54    (_mm_cmpgt_epi64(x0, x1))
     55 
     56 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
     57    (_mm_cmpgt_epi32(x0, x1))
     58 
     59 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
     60    (_mm_or_si128(x0, x1))
     61 
     62 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
     63    (_mm_and_si128(x0, x1))
     64 
     65 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
     66    (_mm_xor_si128(x0, _mm_set1_epi32(-1)))
     67 
     68 #define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
     69    (_mm_slli_si128(x0, (x1) / 8))
     70 
     71 #define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
     72    (_mm_srli_si128(x0, (x1) / 8))
     73 
     74 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
     75    (_mm_slli_epi64(x0, x1))
     76 
     77 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
     78    (_mm_srli_epi64(x0, x1))
     79 
     80 #define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
     81    (_mm_slli_epi32(x0, x1))
     82 
     83 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
     84    (_mm_srli_epi32(x0, x1))
     85 
     86 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \
     87    (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
     88 
     89 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \
     90    (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
     91 
     92 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \
     93    (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
     94 
     95 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
     96    (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1)))))))
     97 
     98 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
     99    (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1)))
    100 
    101 #define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \
    102    (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
    103 
    104 #define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
    105    (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2)))
    106 
    107 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
    108    (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
    109 
    110 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
    111    (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4)))
    112 
    113 #define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \
    114    (_mm_loadu_si128((__m128i*)(x0)))
    115 
    116 #define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \
    117    (_mm_loadu_si128((__m128i*)(x0)))
    118 
    119 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
    120    (_mm_storeu_si128((__m128i*)(x0), x1))
    121 
    122 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
    123    (_mm_storeu_si128((__m128i*)(x0), x1))
    124 
    125 #define Lib_IntVector_Intrinsics_vec128_load_be(x0) \
    126    (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))
    127 
    128 #define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
    129    (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
    130 
    131 #define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
    132    (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
    133 
    134 #define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \
    135    (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
    136 
    137 #define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
    138    (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
    139 
    140 #define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
    141    (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
    142 
    143 #define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
    144    (_mm_insert_epi8(x0, x1, x2))
    145 
    146 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
    147    (_mm_insert_epi32(x0, x1, x2))
    148 
    149 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
    150    (_mm_insert_epi64(x0, x1, x2))
    151 
    152 #define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
    153    (_mm_extract_epi8(x0, x1))
    154 
    155 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
    156    (_mm_extract_epi32(x0, x1))
    157 
    158 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
    159    (_mm_extract_epi64(x0, x1))
    160 
    161 #define Lib_IntVector_Intrinsics_vec128_zero \
    162    (_mm_setzero_si128())
    163 
    164 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
    165    (_mm_add_epi64(x0, x1))
    166 
    167 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
    168    (_mm_sub_epi64(x0, x1))
    169 
    170 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
    171    (_mm_mul_epu32(x0, x1))
    172 
    173 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
    174    (_mm_mul_epu32(x0, _mm_set1_epi64x(x1)))
    175 
    176 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
    177    (_mm_add_epi32(x0, x1))
    178 
    179 #define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
    180    (_mm_sub_epi32(x0, x1))
    181 
    182 #define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
    183    (_mm_mullo_epi32(x0, x1))
    184 
    185 #define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
    186    (_mm_mullo_epi32(x0, _mm_set1_epi32(x1)))
    187 
    188 #define Lib_IntVector_Intrinsics_vec128_load128(x) \
    189    ((__m128i)x)
    190 
    191 #define Lib_IntVector_Intrinsics_vec128_load64(x) \
    192    (_mm_set1_epi64x(x)) /* hi lo */
    193 
    194 #define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \
    195    (_mm_set_epi64x(x1, x0)) /* hi lo */
    196 
    197 #define Lib_IntVector_Intrinsics_vec128_load32(x) \
    198    (_mm_set1_epi32(x))
    199 
    200 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
    201    (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */
    202 
    203 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
    204    (_mm_unpacklo_epi32(x1, x2))
    205 
    206 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
    207    (_mm_unpackhi_epi32(x1, x2))
    208 
    209 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
    210    (_mm_unpacklo_epi64(x1, x2))
    211 
    212 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
    213    (_mm_unpackhi_epi64(x1, x2))
    214 
    215 #endif /* HACL_CAN_COMPILE_VEC128 */
    216 
    217 #if defined(HACL_CAN_COMPILE_VEC256)
    218 
    219 #include <immintrin.h>
    220 
    221 typedef __m256i Lib_IntVector_Intrinsics_vec256;
    222 
    223 #define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \
    224    (_mm256_cmpeq_epi64(x0, x1))
    225 
    226 #define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \
    227    (_mm256_cmpeq_epi32(x0, x1))
    228 
    229 #define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \
    230    (_mm256_cmpgt_epi64(x0, x1))
    231 
    232 #define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \
    233    (_mm256_cmpgt_epi32(x0, x1))
    234 
    235 #define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \
    236    (_mm256_xor_si256(x0, x1))
    237 
    238 #define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \
    239    (_mm256_or_si256(x0, x1))
    240 
    241 #define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \
    242    (_mm256_and_si256(x0, x1))
    243 
    244 #define Lib_IntVector_Intrinsics_vec256_lognot(x0) \
    245    (_mm256_xor_si256(x0, _mm256_set1_epi32(-1)))
    246 
    247 #define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \
    248    (_mm256_slli_si256(x0, (x1) / 8))
    249 
    250 #define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \
    251    (_mm256_srli_si256(x0, (x1) / 8))
    252 
    253 #define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \
    254    (_mm256_slli_epi64(x0, x1))
    255 
    256 #define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \
    257    (_mm256_srli_epi64(x0, x1))
    258 
    259 #define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \
    260    (_mm256_slli_epi32(x0, x1))
    261 
    262 #define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \
    263    (_mm256_srli_epi32(x0, x1))
    264 
    265 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \
    266    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
    267 
    268 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \
    269    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
    270 
    271 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \
    272    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
    273 
    274 #define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \
    275    ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1)))))))
    276 
    277 #define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \
    278    (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1)))
    279 
    280 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \
    281    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1)))
    282 
    283 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \
    284    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2)))
    285 
    286 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \
    287    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3)))
    288 
    289 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \
    290    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4)))
    291 
    292 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \
    293    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5)))
    294 
    295 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \
    296    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6)))
    297 
    298 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \
    299    (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7)))
    300 
    301 #define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \
    302    ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1))))))))))))
    303 
    304 #define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \
    305    (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1)))
    306 
    307 #define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \
    308    (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
    309 
    310 #define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
    311    (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1)))
    312 
    313 #define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \
    314    (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8)))
    315 
    316 #define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \
    317    (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
    318 
    319 #define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \
    320    (_mm256_loadu_si256((__m256i*)(x0)))
    321 
    322 #define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \
    323    (_mm256_loadu_si256((__m256i*)(x0)))
    324 
    325 #define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \
    326    (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
    327 
    328 #define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \
    329    (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
    330 
    331 #define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \
    332    (_mm256_storeu_si256((__m256i*)(x0), x1))
    333 
    334 #define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \
    335    (_mm256_storeu_si256((__m256i*)(x0), x1))
    336 
    337 #define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \
    338    (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
    339 
    340 #define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \
    341    (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
    342 
    343 #define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \
    344    (_mm256_insert_epi8(x0, x1, x2))
    345 
    346 #define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \
    347    (_mm256_insert_epi32(x0, x1, x2))
    348 
    349 #define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \
    350    (_mm256_insert_epi64(x0, x1, x2))
    351 
    352 #define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \
    353    (_mm256_extract_epi8(x0, x1))
    354 
    355 #define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \
    356    (_mm256_extract_epi32(x0, x1))
    357 
    358 #define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \
    359    (_mm256_extract_epi64(x0, x1))
    360 
    361 #define Lib_IntVector_Intrinsics_vec256_zero \
    362    (_mm256_setzero_si256())
    363 
    364 #define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \
    365    (_mm256_add_epi64(x0, x1))
    366 
    367 #define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \
    368    (_mm256_sub_epi64(x0, x1))
    369 
    370 #define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \
    371    (_mm256_mul_epu32(x0, x1))
    372 
    373 #define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \
    374    (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1)))
    375 
    376 #define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \
    377    (_mm256_add_epi32(x0, x1))
    378 
    379 #define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \
    380    (_mm256_sub_epi32(x0, x1))
    381 
    382 #define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \
    383    (_mm256_mullo_epi32(x0, x1))
    384 
    385 #define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \
    386    (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1)))
    387 
    388 #define Lib_IntVector_Intrinsics_vec256_load64(x1) \
    389    (_mm256_set1_epi64x(x1)) /* hi lo */
    390 
    391 #define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \
    392    (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */
    393 
    394 #define Lib_IntVector_Intrinsics_vec256_load32(x) \
    395    (_mm256_set1_epi32(x))
    396 
    397 #define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \
    398    (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */
    399 
    400 #define Lib_IntVector_Intrinsics_vec256_load128(x) \
    401    (_mm256_set_m128i((__m128i)x))
    402 
    403 #define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \
    404    (_mm256_set_m128i((__m128i)x1, (__m128i)x0))
    405 
    406 #define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \
    407    (_mm256_unpacklo_epi32(x1, x2))
    408 
    409 #define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \
    410    (_mm256_unpackhi_epi32(x1, x2))
    411 
    412 #define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \
    413    (_mm256_unpacklo_epi64(x1, x2))
    414 
    415 #define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \
    416    (_mm256_unpackhi_epi64(x1, x2))
    417 
    418 #define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \
    419    (_mm256_permute2x128_si256(x1, x2, 0x20))
    420 
    421 #define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \
    422    (_mm256_permute2x128_si256(x1, x2, 0x31))
    423 
    424 #endif /* HACL_CAN_COMPILE_VEC256 */
    425 
    426 #elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE)
    427 
    428 #if defined(HACL_CAN_COMPILE_VEC128)
    429 
    430 #include <arm_neon.h>
    431 
    432 typedef uint32x4_t Lib_IntVector_Intrinsics_vec128;
    433 
    434 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
    435    (veorq_u32(x0, x1))
    436 
    437 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
    438    (vceqq_u32(x0, x1))
    439 
    440 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
    441    (vceqq_u32(x0, x1))
    442 
    443 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
    444    (vcgtq_u32(x0, x1))
    445 
    446 #define high32(x0) \
    447    (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32)))
    448 
    449 #define low32(x0) \
    450    (vmovn_u64(vreinterpretq_u64_u32(x0)))
    451 
    452 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
    453    (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1)))))))
    454 
    455 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
    456    (vorrq_u32(x0, x1))
    457 
    458 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
    459    (vandq_u32(x0, x1))
    460 
    461 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
    462    (vmvnq_u32(x0))
    463 
    464 #define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
    465    (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8))
    466 
    467 #define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
    468    (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8))
    469 
    470 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
    471    (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1)))
    472 
    473 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
    474    (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1)))
    475 
    476 #define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
    477    (vshlq_n_u32(x0, x1))
    478 
    479 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
    480    (vshrq_n_u32(x0, x1))
    481 
    482 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \
    483    (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
    484 
    485 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
    486    (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1))))
    487 
    488 #define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \
    489    (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
    490 
    491 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
    492    (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1))))
    493 
    494 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
    495    (vextq_u32(x0, x0, x1))
    496 
    497 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
    498    (vextq_u64(x0, x0, x1))
    499 
    500 /*
    501 #define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4)	\
    502  (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4)))
    503 
    504 #define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
    505  (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2)))
    506 */
    507 
    508 #define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \
    509    (vld1q_u32((const uint32_t*)(x0)))
    510 
    511 #define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \
    512    (vld1q_u32((const uint32_t*)(x0)))
    513 
    514 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
    515    (vst1q_u32((uint32_t*)(x0), (x1)))
    516 
    517 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
    518    (vst1q_u32((uint32_t*)(x0), (x1)))
    519 
    520 /*
    521 #define Lib_IntVector_Intrinsics_vec128_load_be(x0)		\
    522  (     Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0)));
    523 
    524 */
    525 
    526 #define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
    527    (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
    528 
    529 #define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
    530    (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
    531 
    532 /*
    533 #define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1)	\
    534  (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
    535 */
    536 
    537 #define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
    538    (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1))))))
    539 
    540 #define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
    541    (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1))))))
    542 
    543 #define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
    544    (vsetq_lane_u8(x1, x0, x2))
    545 
    546 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
    547    (vsetq_lane_u32(x1, x0, x2))
    548 
    549 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
    550    (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2)))
    551 
    552 #define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
    553    (vgetq_lane_u8(x0, x1))
    554 
    555 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
    556    (vgetq_lane_u32(x0, x1))
    557 
    558 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
    559    (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1))
    560 
    561 #define Lib_IntVector_Intrinsics_vec128_zero \
    562    (vdupq_n_u32(0))
    563 
    564 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
    565    (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
    566 
    567 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
    568    (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
    569 
    570 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
    571    (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1)))))
    572 
    573 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
    574    (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1)))
    575 
    576 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
    577    (vaddq_u32(x0, x1))
    578 
    579 #define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
    580    (vsubq_u32(x0, x1))
    581 
    582 #define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
    583    (vmulq_lane_u32(x0, x1))
    584 
    585 #define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
    586    (vmulq_lane_u32(x0, vdupq_n_u32(x1)))
    587 
    588 #define Lib_IntVector_Intrinsics_vec128_load128(x) \
    589    ((uint32x4_t)(x))
    590 
    591 #define Lib_IntVector_Intrinsics_vec128_load64(x) \
    592    (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */
    593 
    594 #define Lib_IntVector_Intrinsics_vec128_load32(x) \
    595    (vdupq_n_u32(x)) /* hi lo */
    596 
    597 static inline Lib_IntVector_Intrinsics_vec128
    598 Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2)
    599 {
    600    const uint64_t a[2] = { x1, x2 };
    601    return vreinterpretq_u32_u64(vld1q_u64(a));
    602 }
    603 
    604 static inline Lib_IntVector_Intrinsics_vec128
    605 Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4)
    606 {
    607    const uint32_t a[4] = { x1, x2, x3, x4 };
    608    return vld1q_u32(a);
    609 }
    610 
    611 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
    612    (vzip1q_u32(x1, x2))
    613 
    614 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
    615    (vzip2q_u32(x1, x2))
    616 
    617 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
    618    (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
    619 
    620 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
    621    (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
    622 
    623 #endif                   /* HACL_CAN_COMPILE_VEC128 */
    624 
    625 /* IBM z architecture */
    626 #elif defined(__s390x__) /* this flag is for GCC only */
    627 
    628 #if defined(HACL_CAN_COMPILE_VEC128)
    629 
    630 #include <stdint.h>
    631 #include <vecintrin.h>
    632 
    633 /* The main vector 128 type
    634 * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char,
    635 * unsigned int, unsigned long long: the compiler complains that the parameter
    636 * combination is invalid. */
    637 typedef unsigned char vector128_8 __attribute__((vector_size(16)));
    638 typedef unsigned int vector128_32 __attribute__((vector_size(16)));
    639 typedef unsigned long long vector128_64 __attribute__((vector_size(16)));
    640 
    641 typedef vector128_8 Lib_IntVector_Intrinsics_vec128;
    642 typedef vector128_8 vector128;
    643 
    644 #define Lib_IntVector_Intrinsics_vec128_load32_le(x) \
    645    (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x))))
    646 
    647 #define Lib_IntVector_Intrinsics_vec128_load32_be(x) \
    648    (vector128)(*((vector128_32*)(const uint8_t*)(x)))
    649 
    650 #define Lib_IntVector_Intrinsics_vec128_load64_le(x) \
    651    (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x))))
    652 
    653 static inline void
    654 Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1)
    655 {
    656    *((vector128_32 *)x0) = vec_revb((vector128_32)x1);
    657 }
    658 
    659 static inline void
    660 Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1)
    661 {
    662    *((vector128_32 *)x0) = (vector128_32)x1;
    663 }
    664 
    665 static inline void
    666 Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1)
    667 {
    668    *((vector128_64 *)x0) = vec_revb((vector128_64)x1);
    669 }
    670 
    671 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
    672    ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1)))))
    673 
    674 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
    675    ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1)))))
    676 
    677 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
    678    ((vector128)(vec_and((vector128)(x0), (vector128)(x1))))
    679 
    680 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
    681    ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1)))))
    682 
    683 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
    684    ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1)))))
    685 
    686 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
    687    ((unsigned int)(vec_extract((vector128_32)(x0), x1)))
    688 
    689 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
    690    ((unsigned long long)(vec_extract((vector128_64)(x0), x1)))
    691 
    692 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
    693    ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1)))))
    694 
    695 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
    696    ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1)))))
    697 
    698 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
    699    ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2)))
    700 
    701 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
    702    ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2)))
    703 
    704 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \
    705    ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1))))
    706 
    707 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \
    708    ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1))))
    709 
    710 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \
    711    ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1))))
    712 
    713 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \
    714    ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1))))
    715 
    716 #define Lib_IntVector_Intrinsics_vec128_load32(x)                      \
    717    ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \
    718                                 (unsigned int)(x), (unsigned int)(x) }))
    719 
    720 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
    721    ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) }))
    722 
    723 #define Lib_IntVector_Intrinsics_vec128_load64(x) \
    724    ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x))))
    725 
    726 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
    727    ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1))))
    728 
    729 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
    730    ((vector128)(vec_mulo((vector128_32)(x0),         \
    731                          (vector128_32)(x1))))
    732 
    733 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
    734    ((vector128)(vec_or((vector128)(x0), (vector128)(x1))))
    735 
    736 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
    737    ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1))))
    738 
    739 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
    740    (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1))))
    741 
    742 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
    743    ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4)))
    744 
    745 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1)                         \
    746    (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \
    747     ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) })))
    748 
    749 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1)                               \
    750    (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \
    751     ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) })))
    752 
    753 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1)                              \
    754    (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \
    755     ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1),                  \
    756                                  0xffffffff >> (x1), 0xffffffff >> (x1) })))
    757 
    758 /* Doesn't work with vec_splat_u64 */
    759 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
    760    ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) }))))
    761 
    762 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
    763    ((vector128)((vector128_64)(x0) - (vector128_64)(x1)))
    764 
    765 static inline vector128
    766 Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1)
    767 {
    768    return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1))));
    769 }
    770 
    771 #define Lib_IntVector_Intrinsics_vec128_zero \
    772    ((vector128){})
    773 
    774 #endif /* HACL_CAN_COMPILE_VEC128 */
    775 
    776 #elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only
    777 
    778 #if defined(HACL_CAN_COMPILE_VEC128)
    779 
    780 #include <altivec.h>
    781 #include <string.h> // for memcpy
    782 #include <stdint.h>
    783 
    784 // The main vector 128 type
    785 // We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char,
    786 // unsigned int, unsigned long long: the compiler complains that the parameter
    787 // combination is invalid.
    788 typedef vector unsigned char vector128_8;
    789 typedef vector unsigned int vector128_32;
    790 typedef vector unsigned long long vector128_64;
    791 
    792 typedef vector128_8 Lib_IntVector_Intrinsics_vec128;
    793 typedef vector128_8 vector128;
    794 
    795 #define Lib_IntVector_Intrinsics_vec128_load32_le(x) \
    796    ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x))))))
    797 
    798 #define Lib_IntVector_Intrinsics_vec128_load64_le(x) \
    799    ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x))))))
    800 
    801 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
    802    (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0))))
    803 
    804 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
    805    (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0))))
    806 
    807 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
    808    ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1)))))
    809 
    810 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
    811    ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1)))))
    812 
    813 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
    814    ((vector128)(vec_and((vector128)(x0), (vector128)(x1))))
    815 
    816 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
    817    ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1)))))
    818 
    819 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
    820    ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1)))))
    821 
    822 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
    823    ((unsigned int)(vec_extract((vector128_32)(x0), x1)))
    824 
    825 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
    826    ((unsigned long long)(vec_extract((vector128_64)(x0), x1)))
    827 
    828 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
    829    ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1)))))
    830 
    831 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
    832    ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1)))))
    833 
    834 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
    835    ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2)))
    836 
    837 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
    838    ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2)))
    839 
    840 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \
    841    ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1))))
    842 
    843 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \
    844    ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1))))
    845 
    846 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \
    847    ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1))))
    848 
    849 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \
    850    ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1))))
    851 
    852 #define Lib_IntVector_Intrinsics_vec128_load32(x)                      \
    853    ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \
    854                                 (unsigned int)(x), (unsigned int)(x) }))
    855 
    856 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
    857    ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) }))
    858 
    859 #define Lib_IntVector_Intrinsics_vec128_load64(x) \
    860    ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) }))
    861 
    862 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
    863    ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1))))
    864 
    865 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
    866    ((vector128)(vec_mule((vector128_32)(x0),         \
    867                          (vector128_32)(x1))))
    868 
    869 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
    870    ((vector128)(vec_or((vector128)(x0), (vector128)(x1))))
    871 
    872 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
    873    ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) })))
    874 
    875 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
    876    (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1))))
    877 
    878 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
    879    ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4)))
    880 
    881 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
    882    ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) })))
    883 
    884 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
    885    ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) })))
    886 
    887 // Doesn't work with vec_splat_u64
    888 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
    889    ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) }))))
    890 
    891 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
    892    ((vector128)((vector128_64)(x0) - (vector128_64)(x1)))
    893 
    894 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
    895    ((vector128)(vec_xor((vector128)(x0), (vector128)(x1))))
    896 
    897 #define Lib_IntVector_Intrinsics_vec128_zero \
    898    ((vector128){})
    899 
    900 #endif /* HACL_CAN_COMPILE_VEC128 */
    901 
    902 #endif // PowerPC64
    903 
    904 // DEBUGGING:
    905 // If libintvector_debug.h exists, use it to debug the current implementations.
    906 // Note that some flags must be enabled for the debugging to be effective:
    907 // see libintvector_debug.h for more details.
    908 #if defined(__has_include)
    909 #if __has_include("libintvector_debug.h")
    910 #include "libintvector_debug.h"
    911 #endif
    912 #endif
    913 
    914 #endif // __Vec_Intrin_H