libintvector.h (37259B)
1 #ifndef __Vec_Intrin_H 2 #define __Vec_Intrin_H 3 4 #include <sys/types.h> 5 6 /* We include config.h here to ensure that the various feature-flags are 7 * properly brought into scope. Users can either run the configure script, or 8 * write a config.h themselves and put it under version control. */ 9 #if defined(__has_include) 10 #if __has_include("config.h") 11 #include "config.h" 12 #endif 13 #endif 14 15 /* # DEBUGGING: 16 * ============ 17 * It is possible to debug the current definitions by using libintvector_debug.h 18 * See the include at the bottom of the file. */ 19 20 #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) 21 22 #if defined(__x86_64__) || defined(_M_X64) 23 24 #if defined(HACL_CAN_COMPILE_VEC128) 25 26 #include <emmintrin.h> 27 #include <tmmintrin.h> 28 #include <smmintrin.h> 29 30 typedef __m128i Lib_IntVector_Intrinsics_vec128; 31 32 #define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ 33 (_mm_aesenc_si128(x0, x1)) 34 35 #define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ 36 (_mm_aesenclast_si128(x0, x1)) 37 38 #define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ 39 (_mm_aeskeygenassist_si128(x0, x1)) 40 41 #define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ 42 (_mm_clmulepi64_si128(x0, x1, x2)) 43 44 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ 45 (_mm_xor_si128(x0, x1)) 46 47 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ 48 (_mm_cmpeq_epi64(x0, x1)) 49 50 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ 51 (_mm_cmpeq_epi32(x0, x1)) 52 53 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ 54 (_mm_cmpgt_epi64(x0, x1)) 55 56 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ 57 (_mm_cmpgt_epi32(x0, x1)) 58 59 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ 60 (_mm_or_si128(x0, x1)) 61 62 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ 63 (_mm_and_si128(x0, x1)) 64 65 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ 66 (_mm_xor_si128(x0, _mm_set1_epi32(-1))) 67 68 #define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ 69 (_mm_slli_si128(x0, (x1) / 8)) 70 71 #define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ 72 (_mm_srli_si128(x0, (x1) / 8)) 73 74 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ 75 (_mm_slli_epi64(x0, x1)) 76 77 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ 78 (_mm_srli_epi64(x0, x1)) 79 80 #define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ 81 (_mm_slli_epi32(x0, x1)) 82 83 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ 84 (_mm_srli_epi32(x0, x1)) 85 86 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ 87 (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) 88 89 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ 90 (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) 91 92 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \ 93 (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) 94 95 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ 96 (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1))))))) 97 98 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ 99 (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1))) 100 101 #define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ 102 (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1))) 103 104 #define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ 105 (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2))) 106 107 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ 108 (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) 109 110 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ 111 (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4))) 112 113 #define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ 114 (_mm_loadu_si128((__m128i*)(x0))) 115 116 #define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ 117 (_mm_loadu_si128((__m128i*)(x0))) 118 119 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ 120 (_mm_storeu_si128((__m128i*)(x0), x1)) 121 122 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ 123 (_mm_storeu_si128((__m128i*)(x0), x1)) 124 125 #define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ 126 (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) 127 128 #define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ 129 (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) 130 131 #define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ 132 (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) 133 134 #define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ 135 (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) 136 137 #define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ 138 (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) 139 140 #define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ 141 (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) 142 143 #define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ 144 (_mm_insert_epi8(x0, x1, x2)) 145 146 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ 147 (_mm_insert_epi32(x0, x1, x2)) 148 149 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ 150 (_mm_insert_epi64(x0, x1, x2)) 151 152 #define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ 153 (_mm_extract_epi8(x0, x1)) 154 155 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ 156 (_mm_extract_epi32(x0, x1)) 157 158 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ 159 (_mm_extract_epi64(x0, x1)) 160 161 #define Lib_IntVector_Intrinsics_vec128_zero \ 162 (_mm_setzero_si128()) 163 164 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ 165 (_mm_add_epi64(x0, x1)) 166 167 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ 168 (_mm_sub_epi64(x0, x1)) 169 170 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ 171 (_mm_mul_epu32(x0, x1)) 172 173 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ 174 (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) 175 176 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ 177 (_mm_add_epi32(x0, x1)) 178 179 #define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ 180 (_mm_sub_epi32(x0, x1)) 181 182 #define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ 183 (_mm_mullo_epi32(x0, x1)) 184 185 #define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ 186 (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) 187 188 #define Lib_IntVector_Intrinsics_vec128_load128(x) \ 189 ((__m128i)x) 190 191 #define Lib_IntVector_Intrinsics_vec128_load64(x) \ 192 (_mm_set1_epi64x(x)) /* hi lo */ 193 194 #define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \ 195 (_mm_set_epi64x(x1, x0)) /* hi lo */ 196 197 #define Lib_IntVector_Intrinsics_vec128_load32(x) \ 198 (_mm_set1_epi32(x)) 199 200 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ 201 (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ 202 203 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ 204 (_mm_unpacklo_epi32(x1, x2)) 205 206 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ 207 (_mm_unpackhi_epi32(x1, x2)) 208 209 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ 210 (_mm_unpacklo_epi64(x1, x2)) 211 212 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ 213 (_mm_unpackhi_epi64(x1, x2)) 214 215 #endif /* HACL_CAN_COMPILE_VEC128 */ 216 217 #if defined(HACL_CAN_COMPILE_VEC256) 218 219 #include <immintrin.h> 220 221 typedef __m256i Lib_IntVector_Intrinsics_vec256; 222 223 #define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ 224 (_mm256_cmpeq_epi64(x0, x1)) 225 226 #define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ 227 (_mm256_cmpeq_epi32(x0, x1)) 228 229 #define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ 230 (_mm256_cmpgt_epi64(x0, x1)) 231 232 #define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ 233 (_mm256_cmpgt_epi32(x0, x1)) 234 235 #define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ 236 (_mm256_xor_si256(x0, x1)) 237 238 #define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ 239 (_mm256_or_si256(x0, x1)) 240 241 #define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ 242 (_mm256_and_si256(x0, x1)) 243 244 #define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ 245 (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) 246 247 #define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ 248 (_mm256_slli_si256(x0, (x1) / 8)) 249 250 #define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ 251 (_mm256_srli_si256(x0, (x1) / 8)) 252 253 #define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ 254 (_mm256_slli_epi64(x0, x1)) 255 256 #define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ 257 (_mm256_srli_epi64(x0, x1)) 258 259 #define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ 260 (_mm256_slli_epi32(x0, x1)) 261 262 #define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ 263 (_mm256_srli_epi32(x0, x1)) 264 265 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ 266 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) 267 268 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ 269 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) 270 271 #define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \ 272 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) 273 274 #define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \ 275 ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1))))))) 276 277 #define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \ 278 (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1))) 279 280 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \ 281 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1))) 282 283 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \ 284 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2))) 285 286 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \ 287 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3))) 288 289 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \ 290 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4))) 291 292 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \ 293 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5))) 294 295 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \ 296 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6))) 297 298 #define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \ 299 (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7))) 300 301 #define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \ 302 ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1)))))))))))) 303 304 #define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \ 305 (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1))) 306 307 #define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ 308 (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1))) 309 310 #define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ 311 (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1))) 312 313 #define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \ 314 (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8))) 315 316 #define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ 317 (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) 318 319 #define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ 320 (_mm256_loadu_si256((__m256i*)(x0))) 321 322 #define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ 323 (_mm256_loadu_si256((__m256i*)(x0))) 324 325 #define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ 326 (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) 327 328 #define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ 329 (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) 330 331 #define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ 332 (_mm256_storeu_si256((__m256i*)(x0), x1)) 333 334 #define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ 335 (_mm256_storeu_si256((__m256i*)(x0), x1)) 336 337 #define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ 338 (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) 339 340 #define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \ 341 (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) 342 343 #define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ 344 (_mm256_insert_epi8(x0, x1, x2)) 345 346 #define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ 347 (_mm256_insert_epi32(x0, x1, x2)) 348 349 #define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ 350 (_mm256_insert_epi64(x0, x1, x2)) 351 352 #define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ 353 (_mm256_extract_epi8(x0, x1)) 354 355 #define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ 356 (_mm256_extract_epi32(x0, x1)) 357 358 #define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ 359 (_mm256_extract_epi64(x0, x1)) 360 361 #define Lib_IntVector_Intrinsics_vec256_zero \ 362 (_mm256_setzero_si256()) 363 364 #define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ 365 (_mm256_add_epi64(x0, x1)) 366 367 #define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ 368 (_mm256_sub_epi64(x0, x1)) 369 370 #define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ 371 (_mm256_mul_epu32(x0, x1)) 372 373 #define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ 374 (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) 375 376 #define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ 377 (_mm256_add_epi32(x0, x1)) 378 379 #define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ 380 (_mm256_sub_epi32(x0, x1)) 381 382 #define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ 383 (_mm256_mullo_epi32(x0, x1)) 384 385 #define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ 386 (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) 387 388 #define Lib_IntVector_Intrinsics_vec256_load64(x1) \ 389 (_mm256_set1_epi64x(x1)) /* hi lo */ 390 391 #define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \ 392 (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */ 393 394 #define Lib_IntVector_Intrinsics_vec256_load32(x) \ 395 (_mm256_set1_epi32(x)) 396 397 #define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \ 398 (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ 399 400 #define Lib_IntVector_Intrinsics_vec256_load128(x) \ 401 (_mm256_set_m128i((__m128i)x)) 402 403 #define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \ 404 (_mm256_set_m128i((__m128i)x1, (__m128i)x0)) 405 406 #define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ 407 (_mm256_unpacklo_epi32(x1, x2)) 408 409 #define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ 410 (_mm256_unpackhi_epi32(x1, x2)) 411 412 #define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ 413 (_mm256_unpacklo_epi64(x1, x2)) 414 415 #define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ 416 (_mm256_unpackhi_epi64(x1, x2)) 417 418 #define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ 419 (_mm256_permute2x128_si256(x1, x2, 0x20)) 420 421 #define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ 422 (_mm256_permute2x128_si256(x1, x2, 0x31)) 423 424 #endif /* HACL_CAN_COMPILE_VEC256 */ 425 426 #elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) 427 428 #if defined(HACL_CAN_COMPILE_VEC128) 429 430 #include <arm_neon.h> 431 432 typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; 433 434 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ 435 (veorq_u32(x0, x1)) 436 437 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ 438 (vceqq_u32(x0, x1)) 439 440 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ 441 (vceqq_u32(x0, x1)) 442 443 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ 444 (vcgtq_u32(x0, x1)) 445 446 #define high32(x0) \ 447 (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32))) 448 449 #define low32(x0) \ 450 (vmovn_u64(vreinterpretq_u64_u32(x0))) 451 452 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ 453 (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1))))))) 454 455 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ 456 (vorrq_u32(x0, x1)) 457 458 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ 459 (vandq_u32(x0, x1)) 460 461 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ 462 (vmvnq_u32(x0)) 463 464 #define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ 465 (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8)) 466 467 #define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ 468 (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8)) 469 470 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ 471 (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1))) 472 473 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ 474 (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1))) 475 476 #define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ 477 (vshlq_n_u32(x0, x1)) 478 479 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ 480 (vshrq_n_u32(x0, x1)) 481 482 #define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \ 483 (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) 484 485 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ 486 (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1)))) 487 488 #define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \ 489 (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) 490 491 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ 492 (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1)))) 493 494 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ 495 (vextq_u32(x0, x0, x1)) 496 497 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ 498 (vextq_u64(x0, x0, x1)) 499 500 /* 501 #define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ 502 (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4))) 503 504 #define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ 505 (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) 506 */ 507 508 #define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ 509 (vld1q_u32((const uint32_t*)(x0))) 510 511 #define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ 512 (vld1q_u32((const uint32_t*)(x0))) 513 514 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ 515 (vst1q_u32((uint32_t*)(x0), (x1))) 516 517 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ 518 (vst1q_u32((uint32_t*)(x0), (x1))) 519 520 /* 521 #define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ 522 ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0))); 523 524 */ 525 526 #define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ 527 (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) 528 529 #define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ 530 (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) 531 532 /* 533 #define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ 534 (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) 535 */ 536 537 #define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ 538 (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1)))))) 539 540 #define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ 541 (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1)))))) 542 543 #define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ 544 (vsetq_lane_u8(x1, x0, x2)) 545 546 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ 547 (vsetq_lane_u32(x1, x0, x2)) 548 549 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ 550 (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2))) 551 552 #define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ 553 (vgetq_lane_u8(x0, x1)) 554 555 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ 556 (vgetq_lane_u32(x0, x1)) 557 558 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ 559 (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1)) 560 561 #define Lib_IntVector_Intrinsics_vec128_zero \ 562 (vdupq_n_u32(0)) 563 564 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ 565 (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) 566 567 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ 568 (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) 569 570 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ 571 (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1))))) 572 573 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ 574 (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1))) 575 576 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ 577 (vaddq_u32(x0, x1)) 578 579 #define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ 580 (vsubq_u32(x0, x1)) 581 582 #define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ 583 (vmulq_lane_u32(x0, x1)) 584 585 #define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ 586 (vmulq_lane_u32(x0, vdupq_n_u32(x1))) 587 588 #define Lib_IntVector_Intrinsics_vec128_load128(x) \ 589 ((uint32x4_t)(x)) 590 591 #define Lib_IntVector_Intrinsics_vec128_load64(x) \ 592 (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */ 593 594 #define Lib_IntVector_Intrinsics_vec128_load32(x) \ 595 (vdupq_n_u32(x)) /* hi lo */ 596 597 static inline Lib_IntVector_Intrinsics_vec128 598 Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2) 599 { 600 const uint64_t a[2] = { x1, x2 }; 601 return vreinterpretq_u32_u64(vld1q_u64(a)); 602 } 603 604 static inline Lib_IntVector_Intrinsics_vec128 605 Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) 606 { 607 const uint32_t a[4] = { x1, x2, x3, x4 }; 608 return vld1q_u32(a); 609 } 610 611 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ 612 (vzip1q_u32(x1, x2)) 613 614 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ 615 (vzip2q_u32(x1, x2)) 616 617 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ 618 (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) 619 620 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ 621 (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) 622 623 #endif /* HACL_CAN_COMPILE_VEC128 */ 624 625 /* IBM z architecture */ 626 #elif defined(__s390x__) /* this flag is for GCC only */ 627 628 #if defined(HACL_CAN_COMPILE_VEC128) 629 630 #include <stdint.h> 631 #include <vecintrin.h> 632 633 /* The main vector 128 type 634 * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, 635 * unsigned int, unsigned long long: the compiler complains that the parameter 636 * combination is invalid. */ 637 typedef unsigned char vector128_8 __attribute__((vector_size(16))); 638 typedef unsigned int vector128_32 __attribute__((vector_size(16))); 639 typedef unsigned long long vector128_64 __attribute__((vector_size(16))); 640 641 typedef vector128_8 Lib_IntVector_Intrinsics_vec128; 642 typedef vector128_8 vector128; 643 644 #define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ 645 (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x)))) 646 647 #define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ 648 (vector128)(*((vector128_32*)(const uint8_t*)(x))) 649 650 #define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ 651 (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x)))) 652 653 static inline void 654 Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) 655 { 656 *((vector128_32 *)x0) = vec_revb((vector128_32)x1); 657 } 658 659 static inline void 660 Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) 661 { 662 *((vector128_32 *)x0) = (vector128_32)x1; 663 } 664 665 static inline void 666 Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) 667 { 668 *((vector128_64 *)x0) = vec_revb((vector128_64)x1); 669 } 670 671 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ 672 ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) 673 674 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ 675 ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) 676 677 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ 678 ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) 679 680 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ 681 ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) 682 683 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ 684 ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) 685 686 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ 687 ((unsigned int)(vec_extract((vector128_32)(x0), x1))) 688 689 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ 690 ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) 691 692 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ 693 ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) 694 695 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ 696 ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) 697 698 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ 699 ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) 700 701 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ 702 ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) 703 704 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ 705 ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) 706 707 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ 708 ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) 709 710 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ 711 ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) 712 713 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ 714 ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) 715 716 #define Lib_IntVector_Intrinsics_vec128_load32(x) \ 717 ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ 718 (unsigned int)(x), (unsigned int)(x) })) 719 720 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ 721 ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) 722 723 #define Lib_IntVector_Intrinsics_vec128_load64(x) \ 724 ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x)))) 725 726 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ 727 ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) 728 729 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ 730 ((vector128)(vec_mulo((vector128_32)(x0), \ 731 (vector128_32)(x1)))) 732 733 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ 734 ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) 735 736 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ 737 ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) 738 739 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ 740 (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) 741 742 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ 743 ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4))) 744 745 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ 746 (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ 747 ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) }))) 748 749 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ 750 (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ 751 ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) 752 753 #define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ 754 (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \ 755 ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \ 756 0xffffffff >> (x1), 0xffffffff >> (x1) }))) 757 758 /* Doesn't work with vec_splat_u64 */ 759 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ 760 ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) 761 762 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ 763 ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) 764 765 static inline vector128 766 Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) 767 { 768 return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); 769 } 770 771 #define Lib_IntVector_Intrinsics_vec128_zero \ 772 ((vector128){}) 773 774 #endif /* HACL_CAN_COMPILE_VEC128 */ 775 776 #elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only 777 778 #if defined(HACL_CAN_COMPILE_VEC128) 779 780 #include <altivec.h> 781 #include <string.h> // for memcpy 782 #include <stdint.h> 783 784 // The main vector 128 type 785 // We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, 786 // unsigned int, unsigned long long: the compiler complains that the parameter 787 // combination is invalid. 788 typedef vector unsigned char vector128_8; 789 typedef vector unsigned int vector128_32; 790 typedef vector unsigned long long vector128_64; 791 792 typedef vector128_8 Lib_IntVector_Intrinsics_vec128; 793 typedef vector128_8 vector128; 794 795 #define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ 796 ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x)))))) 797 798 #define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ 799 ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x)))))) 800 801 #define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ 802 (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0)))) 803 804 #define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ 805 (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0)))) 806 807 #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ 808 ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) 809 810 #define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ 811 ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) 812 813 #define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ 814 ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) 815 816 #define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ 817 ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) 818 819 #define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ 820 ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) 821 822 #define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ 823 ((unsigned int)(vec_extract((vector128_32)(x0), x1))) 824 825 #define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ 826 ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) 827 828 #define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ 829 ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) 830 831 #define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ 832 ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) 833 834 #define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ 835 ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) 836 837 #define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ 838 ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) 839 840 #define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ 841 ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) 842 843 #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ 844 ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) 845 846 #define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ 847 ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) 848 849 #define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ 850 ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) 851 852 #define Lib_IntVector_Intrinsics_vec128_load32(x) \ 853 ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ 854 (unsigned int)(x), (unsigned int)(x) })) 855 856 #define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ 857 ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) 858 859 #define Lib_IntVector_Intrinsics_vec128_load64(x) \ 860 ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) })) 861 862 #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ 863 ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) 864 865 #define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ 866 ((vector128)(vec_mule((vector128_32)(x0), \ 867 (vector128_32)(x1)))) 868 869 #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ 870 ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) 871 872 #define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ 873 ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) }))) 874 875 #define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ 876 (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) 877 878 #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ 879 ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4))) 880 881 #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ 882 ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) 883 884 #define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ 885 ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) 886 887 // Doesn't work with vec_splat_u64 888 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ 889 ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) 890 891 #define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ 892 ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) 893 894 #define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ 895 ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) 896 897 #define Lib_IntVector_Intrinsics_vec128_zero \ 898 ((vector128){}) 899 900 #endif /* HACL_CAN_COMPILE_VEC128 */ 901 902 #endif // PowerPC64 903 904 // DEBUGGING: 905 // If libintvector_debug.h exists, use it to debug the current implementations. 906 // Note that some flags must be enabled for the debugging to be effective: 907 // see libintvector_debug.h for more details. 908 #if defined(__has_include) 909 #if __has_include("libintvector_debug.h") 910 #include "libintvector_debug.h" 911 #endif 912 #endif 913 914 #endif // __Vec_Intrin_H