Hacl_Chacha20Poly1305_256.c (69988B)
1 /* MIT License 2 * 3 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation 4 * Copyright (c) 2022-2023 HACL* Contributors 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "Hacl_Chacha20Poly1305_256.h" 26 27 #include "internal/Hacl_Poly1305_256.h" 28 #include "internal/Hacl_Krmllib.h" 29 #include "libintvector.h" 30 31 static inline void 32 poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) 33 { 34 uint32_t n = len / (uint32_t)16U; 35 uint32_t r = len % (uint32_t)16U; 36 uint8_t *blocks = text; 37 uint8_t *rem = text + n * (uint32_t)16U; 38 Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U; 39 Lib_IntVector_Intrinsics_vec256 *acc0 = ctx; 40 uint32_t sz_block = (uint32_t)64U; 41 uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; 42 uint8_t *t00 = blocks; 43 if (len0 > (uint32_t)0U) { 44 uint32_t bs = (uint32_t)64U; 45 uint8_t *text0 = t00; 46 Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0); 47 uint32_t len1 = len0 - bs; 48 uint8_t *text1 = t00 + bs; 49 uint32_t nb = len1 / bs; 50 for (uint32_t i = (uint32_t)0U; i < nb; i++) { 51 uint8_t *block = text1 + i * bs; 52 KRML_PRE_ALIGN(32) 53 Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; 54 Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); 55 Lib_IntVector_Intrinsics_vec256 56 hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); 57 Lib_IntVector_Intrinsics_vec256 58 mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 59 Lib_IntVector_Intrinsics_vec256 60 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); 61 Lib_IntVector_Intrinsics_vec256 62 m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); 63 Lib_IntVector_Intrinsics_vec256 64 m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); 65 Lib_IntVector_Intrinsics_vec256 66 m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); 67 Lib_IntVector_Intrinsics_vec256 68 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); 69 Lib_IntVector_Intrinsics_vec256 70 t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); 71 Lib_IntVector_Intrinsics_vec256 72 t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); 73 Lib_IntVector_Intrinsics_vec256 74 t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); 75 Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); 76 Lib_IntVector_Intrinsics_vec256 77 t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); 78 Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); 79 Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); 80 Lib_IntVector_Intrinsics_vec256 81 t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); 82 Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); 83 Lib_IntVector_Intrinsics_vec256 84 o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); 85 Lib_IntVector_Intrinsics_vec256 o00 = o5; 86 Lib_IntVector_Intrinsics_vec256 o11 = o10; 87 Lib_IntVector_Intrinsics_vec256 o21 = o20; 88 Lib_IntVector_Intrinsics_vec256 o31 = o30; 89 Lib_IntVector_Intrinsics_vec256 o41 = o40; 90 e[0U] = o00; 91 e[1U] = o11; 92 e[2U] = o21; 93 e[3U] = o31; 94 e[4U] = o41; 95 uint64_t b = (uint64_t)0x1000000U; 96 Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); 97 Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; 98 e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); 99 Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U; 100 Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U; 101 Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; 102 Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; 103 Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; 104 Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; 105 Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; 106 Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; 107 Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; 108 Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; 109 Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; 110 Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U]; 111 Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U]; 112 Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U]; 113 Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U]; 114 Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U]; 115 Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); 116 Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); 117 Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); 118 Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); 119 Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); 120 Lib_IntVector_Intrinsics_vec256 121 a01 = 122 Lib_IntVector_Intrinsics_vec256_add64(a0, 123 Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); 124 Lib_IntVector_Intrinsics_vec256 125 a11 = 126 Lib_IntVector_Intrinsics_vec256_add64(a1, 127 Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); 128 Lib_IntVector_Intrinsics_vec256 129 a21 = 130 Lib_IntVector_Intrinsics_vec256_add64(a2, 131 Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); 132 Lib_IntVector_Intrinsics_vec256 133 a31 = 134 Lib_IntVector_Intrinsics_vec256_add64(a3, 135 Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); 136 Lib_IntVector_Intrinsics_vec256 137 a41 = 138 Lib_IntVector_Intrinsics_vec256_add64(a4, 139 Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); 140 Lib_IntVector_Intrinsics_vec256 141 a02 = 142 Lib_IntVector_Intrinsics_vec256_add64(a01, 143 Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); 144 Lib_IntVector_Intrinsics_vec256 145 a12 = 146 Lib_IntVector_Intrinsics_vec256_add64(a11, 147 Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); 148 Lib_IntVector_Intrinsics_vec256 149 a22 = 150 Lib_IntVector_Intrinsics_vec256_add64(a21, 151 Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); 152 Lib_IntVector_Intrinsics_vec256 153 a32 = 154 Lib_IntVector_Intrinsics_vec256_add64(a31, 155 Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); 156 Lib_IntVector_Intrinsics_vec256 157 a42 = 158 Lib_IntVector_Intrinsics_vec256_add64(a41, 159 Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); 160 Lib_IntVector_Intrinsics_vec256 161 a03 = 162 Lib_IntVector_Intrinsics_vec256_add64(a02, 163 Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); 164 Lib_IntVector_Intrinsics_vec256 165 a13 = 166 Lib_IntVector_Intrinsics_vec256_add64(a12, 167 Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); 168 Lib_IntVector_Intrinsics_vec256 169 a23 = 170 Lib_IntVector_Intrinsics_vec256_add64(a22, 171 Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); 172 Lib_IntVector_Intrinsics_vec256 173 a33 = 174 Lib_IntVector_Intrinsics_vec256_add64(a32, 175 Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); 176 Lib_IntVector_Intrinsics_vec256 177 a43 = 178 Lib_IntVector_Intrinsics_vec256_add64(a42, 179 Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); 180 Lib_IntVector_Intrinsics_vec256 181 a04 = 182 Lib_IntVector_Intrinsics_vec256_add64(a03, 183 Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); 184 Lib_IntVector_Intrinsics_vec256 185 a14 = 186 Lib_IntVector_Intrinsics_vec256_add64(a13, 187 Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); 188 Lib_IntVector_Intrinsics_vec256 189 a24 = 190 Lib_IntVector_Intrinsics_vec256_add64(a23, 191 Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); 192 Lib_IntVector_Intrinsics_vec256 193 a34 = 194 Lib_IntVector_Intrinsics_vec256_add64(a33, 195 Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); 196 Lib_IntVector_Intrinsics_vec256 197 a44 = 198 Lib_IntVector_Intrinsics_vec256_add64(a43, 199 Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); 200 Lib_IntVector_Intrinsics_vec256 t01 = a04; 201 Lib_IntVector_Intrinsics_vec256 t1 = a14; 202 Lib_IntVector_Intrinsics_vec256 t2 = a24; 203 Lib_IntVector_Intrinsics_vec256 t3 = a34; 204 Lib_IntVector_Intrinsics_vec256 t4 = a44; 205 Lib_IntVector_Intrinsics_vec256 206 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 207 Lib_IntVector_Intrinsics_vec256 208 z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); 209 Lib_IntVector_Intrinsics_vec256 210 z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); 211 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); 212 Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); 213 Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); 214 Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); 215 Lib_IntVector_Intrinsics_vec256 216 z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); 217 Lib_IntVector_Intrinsics_vec256 218 z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); 219 Lib_IntVector_Intrinsics_vec256 220 t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); 221 Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); 222 Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); 223 Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); 224 Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); 225 Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); 226 Lib_IntVector_Intrinsics_vec256 227 z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); 228 Lib_IntVector_Intrinsics_vec256 229 z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); 230 Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); 231 Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); 232 Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); 233 Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); 234 Lib_IntVector_Intrinsics_vec256 235 z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); 236 Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); 237 Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); 238 Lib_IntVector_Intrinsics_vec256 o01 = x02; 239 Lib_IntVector_Intrinsics_vec256 o12 = x12; 240 Lib_IntVector_Intrinsics_vec256 o22 = x21; 241 Lib_IntVector_Intrinsics_vec256 o32 = x32; 242 Lib_IntVector_Intrinsics_vec256 o42 = x42; 243 acc0[0U] = o01; 244 acc0[1U] = o12; 245 acc0[2U] = o22; 246 acc0[3U] = o32; 247 acc0[4U] = o42; 248 Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U]; 249 Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U]; 250 Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U]; 251 Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U]; 252 Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U]; 253 Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; 254 Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; 255 Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; 256 Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; 257 Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; 258 Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); 259 Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); 260 Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); 261 Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); 262 Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); 263 acc0[0U] = o0; 264 acc0[1U] = o1; 265 acc0[2U] = o2; 266 acc0[3U] = o3; 267 acc0[4U] = o4; 268 } 269 Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0); 270 } 271 uint32_t len1 = n * (uint32_t)16U - len0; 272 uint8_t *t10 = blocks + len0; 273 uint32_t nb = len1 / (uint32_t)16U; 274 uint32_t rem1 = len1 % (uint32_t)16U; 275 for (uint32_t i = (uint32_t)0U; i < nb; i++) { 276 uint8_t *block = t10 + i * (uint32_t)16U; 277 KRML_PRE_ALIGN(32) 278 Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; 279 uint64_t u0 = load64_le(block); 280 uint64_t lo = u0; 281 uint64_t u = load64_le(block + (uint32_t)8U); 282 uint64_t hi = u; 283 Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); 284 Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); 285 Lib_IntVector_Intrinsics_vec256 286 f010 = 287 Lib_IntVector_Intrinsics_vec256_and(f0, 288 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 289 Lib_IntVector_Intrinsics_vec256 290 f110 = 291 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 292 (uint32_t)26U), 293 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 294 Lib_IntVector_Intrinsics_vec256 295 f20 = 296 Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 297 (uint32_t)52U), 298 Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, 299 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), 300 (uint32_t)12U)); 301 Lib_IntVector_Intrinsics_vec256 302 f30 = 303 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, 304 (uint32_t)14U), 305 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 306 Lib_IntVector_Intrinsics_vec256 307 f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); 308 Lib_IntVector_Intrinsics_vec256 f01 = f010; 309 Lib_IntVector_Intrinsics_vec256 f111 = f110; 310 Lib_IntVector_Intrinsics_vec256 f2 = f20; 311 Lib_IntVector_Intrinsics_vec256 f3 = f30; 312 Lib_IntVector_Intrinsics_vec256 f41 = f40; 313 e[0U] = f01; 314 e[1U] = f111; 315 e[2U] = f2; 316 e[3U] = f3; 317 e[4U] = f41; 318 uint64_t b = (uint64_t)0x1000000U; 319 Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); 320 Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; 321 e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); 322 Lib_IntVector_Intrinsics_vec256 *r1 = pre0; 323 Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; 324 Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; 325 Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; 326 Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; 327 Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; 328 Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; 329 Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; 330 Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; 331 Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; 332 Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; 333 Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; 334 Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; 335 Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; 336 Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; 337 Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; 338 Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; 339 Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; 340 Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; 341 Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; 342 Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; 343 Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); 344 Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); 345 Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); 346 Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); 347 Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); 348 Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); 349 Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); 350 Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); 351 Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); 352 Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); 353 Lib_IntVector_Intrinsics_vec256 354 a03 = 355 Lib_IntVector_Intrinsics_vec256_add64(a02, 356 Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); 357 Lib_IntVector_Intrinsics_vec256 358 a13 = 359 Lib_IntVector_Intrinsics_vec256_add64(a12, 360 Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); 361 Lib_IntVector_Intrinsics_vec256 362 a23 = 363 Lib_IntVector_Intrinsics_vec256_add64(a22, 364 Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); 365 Lib_IntVector_Intrinsics_vec256 366 a33 = 367 Lib_IntVector_Intrinsics_vec256_add64(a32, 368 Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); 369 Lib_IntVector_Intrinsics_vec256 370 a43 = 371 Lib_IntVector_Intrinsics_vec256_add64(a42, 372 Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); 373 Lib_IntVector_Intrinsics_vec256 374 a04 = 375 Lib_IntVector_Intrinsics_vec256_add64(a03, 376 Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); 377 Lib_IntVector_Intrinsics_vec256 378 a14 = 379 Lib_IntVector_Intrinsics_vec256_add64(a13, 380 Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); 381 Lib_IntVector_Intrinsics_vec256 382 a24 = 383 Lib_IntVector_Intrinsics_vec256_add64(a23, 384 Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); 385 Lib_IntVector_Intrinsics_vec256 386 a34 = 387 Lib_IntVector_Intrinsics_vec256_add64(a33, 388 Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); 389 Lib_IntVector_Intrinsics_vec256 390 a44 = 391 Lib_IntVector_Intrinsics_vec256_add64(a43, 392 Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); 393 Lib_IntVector_Intrinsics_vec256 394 a05 = 395 Lib_IntVector_Intrinsics_vec256_add64(a04, 396 Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); 397 Lib_IntVector_Intrinsics_vec256 398 a15 = 399 Lib_IntVector_Intrinsics_vec256_add64(a14, 400 Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); 401 Lib_IntVector_Intrinsics_vec256 402 a25 = 403 Lib_IntVector_Intrinsics_vec256_add64(a24, 404 Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); 405 Lib_IntVector_Intrinsics_vec256 406 a35 = 407 Lib_IntVector_Intrinsics_vec256_add64(a34, 408 Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); 409 Lib_IntVector_Intrinsics_vec256 410 a45 = 411 Lib_IntVector_Intrinsics_vec256_add64(a44, 412 Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); 413 Lib_IntVector_Intrinsics_vec256 414 a06 = 415 Lib_IntVector_Intrinsics_vec256_add64(a05, 416 Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); 417 Lib_IntVector_Intrinsics_vec256 418 a16 = 419 Lib_IntVector_Intrinsics_vec256_add64(a15, 420 Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); 421 Lib_IntVector_Intrinsics_vec256 422 a26 = 423 Lib_IntVector_Intrinsics_vec256_add64(a25, 424 Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); 425 Lib_IntVector_Intrinsics_vec256 426 a36 = 427 Lib_IntVector_Intrinsics_vec256_add64(a35, 428 Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); 429 Lib_IntVector_Intrinsics_vec256 430 a46 = 431 Lib_IntVector_Intrinsics_vec256_add64(a45, 432 Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); 433 Lib_IntVector_Intrinsics_vec256 t01 = a06; 434 Lib_IntVector_Intrinsics_vec256 t11 = a16; 435 Lib_IntVector_Intrinsics_vec256 t2 = a26; 436 Lib_IntVector_Intrinsics_vec256 t3 = a36; 437 Lib_IntVector_Intrinsics_vec256 t4 = a46; 438 Lib_IntVector_Intrinsics_vec256 439 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 440 Lib_IntVector_Intrinsics_vec256 441 z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); 442 Lib_IntVector_Intrinsics_vec256 443 z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); 444 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); 445 Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); 446 Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); 447 Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); 448 Lib_IntVector_Intrinsics_vec256 449 z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); 450 Lib_IntVector_Intrinsics_vec256 451 z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); 452 Lib_IntVector_Intrinsics_vec256 453 t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); 454 Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); 455 Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); 456 Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); 457 Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); 458 Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); 459 Lib_IntVector_Intrinsics_vec256 460 z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); 461 Lib_IntVector_Intrinsics_vec256 462 z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); 463 Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); 464 Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); 465 Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); 466 Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); 467 Lib_IntVector_Intrinsics_vec256 468 z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); 469 Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); 470 Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); 471 Lib_IntVector_Intrinsics_vec256 o0 = x02; 472 Lib_IntVector_Intrinsics_vec256 o1 = x12; 473 Lib_IntVector_Intrinsics_vec256 o2 = x21; 474 Lib_IntVector_Intrinsics_vec256 o3 = x32; 475 Lib_IntVector_Intrinsics_vec256 o4 = x42; 476 acc0[0U] = o0; 477 acc0[1U] = o1; 478 acc0[2U] = o2; 479 acc0[3U] = o3; 480 acc0[4U] = o4; 481 } 482 if (rem1 > (uint32_t)0U) { 483 uint8_t *last = t10 + nb * (uint32_t)16U; 484 KRML_PRE_ALIGN(32) 485 Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; 486 uint8_t tmp[16U] = { 0U }; 487 memcpy(tmp, last, rem1 * sizeof(uint8_t)); 488 uint64_t u0 = load64_le(tmp); 489 uint64_t lo = u0; 490 uint64_t u = load64_le(tmp + (uint32_t)8U); 491 uint64_t hi = u; 492 Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); 493 Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); 494 Lib_IntVector_Intrinsics_vec256 495 f010 = 496 Lib_IntVector_Intrinsics_vec256_and(f0, 497 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 498 Lib_IntVector_Intrinsics_vec256 499 f110 = 500 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 501 (uint32_t)26U), 502 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 503 Lib_IntVector_Intrinsics_vec256 504 f20 = 505 Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 506 (uint32_t)52U), 507 Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, 508 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), 509 (uint32_t)12U)); 510 Lib_IntVector_Intrinsics_vec256 511 f30 = 512 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, 513 (uint32_t)14U), 514 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 515 Lib_IntVector_Intrinsics_vec256 516 f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); 517 Lib_IntVector_Intrinsics_vec256 f01 = f010; 518 Lib_IntVector_Intrinsics_vec256 f111 = f110; 519 Lib_IntVector_Intrinsics_vec256 f2 = f20; 520 Lib_IntVector_Intrinsics_vec256 f3 = f30; 521 Lib_IntVector_Intrinsics_vec256 f4 = f40; 522 e[0U] = f01; 523 e[1U] = f111; 524 e[2U] = f2; 525 e[3U] = f3; 526 e[4U] = f4; 527 uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; 528 Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); 529 Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; 530 e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); 531 Lib_IntVector_Intrinsics_vec256 *r1 = pre0; 532 Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; 533 Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; 534 Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; 535 Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; 536 Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; 537 Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; 538 Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; 539 Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; 540 Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; 541 Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; 542 Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; 543 Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; 544 Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; 545 Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; 546 Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; 547 Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; 548 Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; 549 Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; 550 Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; 551 Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; 552 Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); 553 Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); 554 Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); 555 Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); 556 Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); 557 Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); 558 Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); 559 Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); 560 Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); 561 Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); 562 Lib_IntVector_Intrinsics_vec256 563 a03 = 564 Lib_IntVector_Intrinsics_vec256_add64(a02, 565 Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); 566 Lib_IntVector_Intrinsics_vec256 567 a13 = 568 Lib_IntVector_Intrinsics_vec256_add64(a12, 569 Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); 570 Lib_IntVector_Intrinsics_vec256 571 a23 = 572 Lib_IntVector_Intrinsics_vec256_add64(a22, 573 Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); 574 Lib_IntVector_Intrinsics_vec256 575 a33 = 576 Lib_IntVector_Intrinsics_vec256_add64(a32, 577 Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); 578 Lib_IntVector_Intrinsics_vec256 579 a43 = 580 Lib_IntVector_Intrinsics_vec256_add64(a42, 581 Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); 582 Lib_IntVector_Intrinsics_vec256 583 a04 = 584 Lib_IntVector_Intrinsics_vec256_add64(a03, 585 Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); 586 Lib_IntVector_Intrinsics_vec256 587 a14 = 588 Lib_IntVector_Intrinsics_vec256_add64(a13, 589 Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); 590 Lib_IntVector_Intrinsics_vec256 591 a24 = 592 Lib_IntVector_Intrinsics_vec256_add64(a23, 593 Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); 594 Lib_IntVector_Intrinsics_vec256 595 a34 = 596 Lib_IntVector_Intrinsics_vec256_add64(a33, 597 Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); 598 Lib_IntVector_Intrinsics_vec256 599 a44 = 600 Lib_IntVector_Intrinsics_vec256_add64(a43, 601 Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); 602 Lib_IntVector_Intrinsics_vec256 603 a05 = 604 Lib_IntVector_Intrinsics_vec256_add64(a04, 605 Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); 606 Lib_IntVector_Intrinsics_vec256 607 a15 = 608 Lib_IntVector_Intrinsics_vec256_add64(a14, 609 Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); 610 Lib_IntVector_Intrinsics_vec256 611 a25 = 612 Lib_IntVector_Intrinsics_vec256_add64(a24, 613 Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); 614 Lib_IntVector_Intrinsics_vec256 615 a35 = 616 Lib_IntVector_Intrinsics_vec256_add64(a34, 617 Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); 618 Lib_IntVector_Intrinsics_vec256 619 a45 = 620 Lib_IntVector_Intrinsics_vec256_add64(a44, 621 Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); 622 Lib_IntVector_Intrinsics_vec256 623 a06 = 624 Lib_IntVector_Intrinsics_vec256_add64(a05, 625 Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); 626 Lib_IntVector_Intrinsics_vec256 627 a16 = 628 Lib_IntVector_Intrinsics_vec256_add64(a15, 629 Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); 630 Lib_IntVector_Intrinsics_vec256 631 a26 = 632 Lib_IntVector_Intrinsics_vec256_add64(a25, 633 Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); 634 Lib_IntVector_Intrinsics_vec256 635 a36 = 636 Lib_IntVector_Intrinsics_vec256_add64(a35, 637 Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); 638 Lib_IntVector_Intrinsics_vec256 639 a46 = 640 Lib_IntVector_Intrinsics_vec256_add64(a45, 641 Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); 642 Lib_IntVector_Intrinsics_vec256 t01 = a06; 643 Lib_IntVector_Intrinsics_vec256 t11 = a16; 644 Lib_IntVector_Intrinsics_vec256 t2 = a26; 645 Lib_IntVector_Intrinsics_vec256 t3 = a36; 646 Lib_IntVector_Intrinsics_vec256 t4 = a46; 647 Lib_IntVector_Intrinsics_vec256 648 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 649 Lib_IntVector_Intrinsics_vec256 650 z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); 651 Lib_IntVector_Intrinsics_vec256 652 z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); 653 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); 654 Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); 655 Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); 656 Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); 657 Lib_IntVector_Intrinsics_vec256 658 z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); 659 Lib_IntVector_Intrinsics_vec256 660 z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); 661 Lib_IntVector_Intrinsics_vec256 662 t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); 663 Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); 664 Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); 665 Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); 666 Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); 667 Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); 668 Lib_IntVector_Intrinsics_vec256 669 z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); 670 Lib_IntVector_Intrinsics_vec256 671 z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); 672 Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); 673 Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); 674 Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); 675 Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); 676 Lib_IntVector_Intrinsics_vec256 677 z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); 678 Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); 679 Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); 680 Lib_IntVector_Intrinsics_vec256 o0 = x02; 681 Lib_IntVector_Intrinsics_vec256 o1 = x12; 682 Lib_IntVector_Intrinsics_vec256 o2 = x21; 683 Lib_IntVector_Intrinsics_vec256 o3 = x32; 684 Lib_IntVector_Intrinsics_vec256 o4 = x42; 685 acc0[0U] = o0; 686 acc0[1U] = o1; 687 acc0[2U] = o2; 688 acc0[3U] = o3; 689 acc0[4U] = o4; 690 } 691 uint8_t tmp[16U] = { 0U }; 692 memcpy(tmp, rem, r * sizeof(uint8_t)); 693 if (r > (uint32_t)0U) { 694 Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; 695 Lib_IntVector_Intrinsics_vec256 *acc = ctx; 696 KRML_PRE_ALIGN(32) 697 Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; 698 uint64_t u0 = load64_le(tmp); 699 uint64_t lo = u0; 700 uint64_t u = load64_le(tmp + (uint32_t)8U); 701 uint64_t hi = u; 702 Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); 703 Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); 704 Lib_IntVector_Intrinsics_vec256 705 f010 = 706 Lib_IntVector_Intrinsics_vec256_and(f0, 707 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 708 Lib_IntVector_Intrinsics_vec256 709 f110 = 710 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 711 (uint32_t)26U), 712 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 713 Lib_IntVector_Intrinsics_vec256 714 f20 = 715 Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 716 (uint32_t)52U), 717 Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, 718 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), 719 (uint32_t)12U)); 720 Lib_IntVector_Intrinsics_vec256 721 f30 = 722 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, 723 (uint32_t)14U), 724 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 725 Lib_IntVector_Intrinsics_vec256 726 f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); 727 Lib_IntVector_Intrinsics_vec256 f01 = f010; 728 Lib_IntVector_Intrinsics_vec256 f111 = f110; 729 Lib_IntVector_Intrinsics_vec256 f2 = f20; 730 Lib_IntVector_Intrinsics_vec256 f3 = f30; 731 Lib_IntVector_Intrinsics_vec256 f41 = f40; 732 e[0U] = f01; 733 e[1U] = f111; 734 e[2U] = f2; 735 e[3U] = f3; 736 e[4U] = f41; 737 uint64_t b = (uint64_t)0x1000000U; 738 Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); 739 Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; 740 e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); 741 Lib_IntVector_Intrinsics_vec256 *r1 = pre; 742 Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; 743 Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; 744 Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; 745 Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; 746 Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; 747 Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; 748 Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; 749 Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; 750 Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; 751 Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; 752 Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; 753 Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; 754 Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; 755 Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; 756 Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; 757 Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; 758 Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; 759 Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; 760 Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; 761 Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; 762 Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); 763 Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); 764 Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); 765 Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); 766 Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); 767 Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); 768 Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); 769 Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); 770 Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); 771 Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); 772 Lib_IntVector_Intrinsics_vec256 773 a03 = 774 Lib_IntVector_Intrinsics_vec256_add64(a02, 775 Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); 776 Lib_IntVector_Intrinsics_vec256 777 a13 = 778 Lib_IntVector_Intrinsics_vec256_add64(a12, 779 Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); 780 Lib_IntVector_Intrinsics_vec256 781 a23 = 782 Lib_IntVector_Intrinsics_vec256_add64(a22, 783 Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); 784 Lib_IntVector_Intrinsics_vec256 785 a33 = 786 Lib_IntVector_Intrinsics_vec256_add64(a32, 787 Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); 788 Lib_IntVector_Intrinsics_vec256 789 a43 = 790 Lib_IntVector_Intrinsics_vec256_add64(a42, 791 Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); 792 Lib_IntVector_Intrinsics_vec256 793 a04 = 794 Lib_IntVector_Intrinsics_vec256_add64(a03, 795 Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); 796 Lib_IntVector_Intrinsics_vec256 797 a14 = 798 Lib_IntVector_Intrinsics_vec256_add64(a13, 799 Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); 800 Lib_IntVector_Intrinsics_vec256 801 a24 = 802 Lib_IntVector_Intrinsics_vec256_add64(a23, 803 Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); 804 Lib_IntVector_Intrinsics_vec256 805 a34 = 806 Lib_IntVector_Intrinsics_vec256_add64(a33, 807 Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); 808 Lib_IntVector_Intrinsics_vec256 809 a44 = 810 Lib_IntVector_Intrinsics_vec256_add64(a43, 811 Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); 812 Lib_IntVector_Intrinsics_vec256 813 a05 = 814 Lib_IntVector_Intrinsics_vec256_add64(a04, 815 Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); 816 Lib_IntVector_Intrinsics_vec256 817 a15 = 818 Lib_IntVector_Intrinsics_vec256_add64(a14, 819 Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); 820 Lib_IntVector_Intrinsics_vec256 821 a25 = 822 Lib_IntVector_Intrinsics_vec256_add64(a24, 823 Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); 824 Lib_IntVector_Intrinsics_vec256 825 a35 = 826 Lib_IntVector_Intrinsics_vec256_add64(a34, 827 Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); 828 Lib_IntVector_Intrinsics_vec256 829 a45 = 830 Lib_IntVector_Intrinsics_vec256_add64(a44, 831 Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); 832 Lib_IntVector_Intrinsics_vec256 833 a06 = 834 Lib_IntVector_Intrinsics_vec256_add64(a05, 835 Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); 836 Lib_IntVector_Intrinsics_vec256 837 a16 = 838 Lib_IntVector_Intrinsics_vec256_add64(a15, 839 Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); 840 Lib_IntVector_Intrinsics_vec256 841 a26 = 842 Lib_IntVector_Intrinsics_vec256_add64(a25, 843 Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); 844 Lib_IntVector_Intrinsics_vec256 845 a36 = 846 Lib_IntVector_Intrinsics_vec256_add64(a35, 847 Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); 848 Lib_IntVector_Intrinsics_vec256 849 a46 = 850 Lib_IntVector_Intrinsics_vec256_add64(a45, 851 Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); 852 Lib_IntVector_Intrinsics_vec256 t0 = a06; 853 Lib_IntVector_Intrinsics_vec256 t1 = a16; 854 Lib_IntVector_Intrinsics_vec256 t2 = a26; 855 Lib_IntVector_Intrinsics_vec256 t3 = a36; 856 Lib_IntVector_Intrinsics_vec256 t4 = a46; 857 Lib_IntVector_Intrinsics_vec256 858 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 859 Lib_IntVector_Intrinsics_vec256 860 z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); 861 Lib_IntVector_Intrinsics_vec256 862 z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); 863 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); 864 Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); 865 Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); 866 Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); 867 Lib_IntVector_Intrinsics_vec256 868 z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); 869 Lib_IntVector_Intrinsics_vec256 870 z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); 871 Lib_IntVector_Intrinsics_vec256 872 t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); 873 Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); 874 Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); 875 Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); 876 Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); 877 Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); 878 Lib_IntVector_Intrinsics_vec256 879 z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); 880 Lib_IntVector_Intrinsics_vec256 881 z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); 882 Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); 883 Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); 884 Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); 885 Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); 886 Lib_IntVector_Intrinsics_vec256 887 z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); 888 Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); 889 Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); 890 Lib_IntVector_Intrinsics_vec256 o0 = x02; 891 Lib_IntVector_Intrinsics_vec256 o1 = x12; 892 Lib_IntVector_Intrinsics_vec256 o2 = x21; 893 Lib_IntVector_Intrinsics_vec256 o3 = x32; 894 Lib_IntVector_Intrinsics_vec256 o4 = x42; 895 acc[0U] = o0; 896 acc[1U] = o1; 897 acc[2U] = o2; 898 acc[3U] = o3; 899 acc[4U] = o4; 900 return; 901 } 902 } 903 904 static inline void 905 poly1305_do_256( 906 uint8_t *k, 907 uint32_t aadlen, 908 uint8_t *aad, 909 uint32_t mlen, 910 uint8_t *m, 911 uint8_t *out) 912 { 913 KRML_PRE_ALIGN(32) 914 Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; 915 uint8_t block[16U] = { 0U }; 916 Hacl_Poly1305_256_poly1305_init(ctx, k); 917 if (aadlen != (uint32_t)0U) { 918 poly1305_padded_256(ctx, aadlen, aad); 919 } 920 if (mlen != (uint32_t)0U) { 921 poly1305_padded_256(ctx, mlen, m); 922 } 923 store64_le(block, (uint64_t)aadlen); 924 store64_le(block + (uint32_t)8U, (uint64_t)mlen); 925 Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; 926 Lib_IntVector_Intrinsics_vec256 *acc = ctx; 927 KRML_PRE_ALIGN(32) 928 Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; 929 uint64_t u0 = load64_le(block); 930 uint64_t lo = u0; 931 uint64_t u = load64_le(block + (uint32_t)8U); 932 uint64_t hi = u; 933 Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); 934 Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); 935 Lib_IntVector_Intrinsics_vec256 936 f010 = 937 Lib_IntVector_Intrinsics_vec256_and(f0, 938 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 939 Lib_IntVector_Intrinsics_vec256 940 f110 = 941 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 942 (uint32_t)26U), 943 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 944 Lib_IntVector_Intrinsics_vec256 945 f20 = 946 Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, 947 (uint32_t)52U), 948 Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, 949 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), 950 (uint32_t)12U)); 951 Lib_IntVector_Intrinsics_vec256 952 f30 = 953 Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, 954 (uint32_t)14U), 955 Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); 956 Lib_IntVector_Intrinsics_vec256 957 f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); 958 Lib_IntVector_Intrinsics_vec256 f01 = f010; 959 Lib_IntVector_Intrinsics_vec256 f111 = f110; 960 Lib_IntVector_Intrinsics_vec256 f2 = f20; 961 Lib_IntVector_Intrinsics_vec256 f3 = f30; 962 Lib_IntVector_Intrinsics_vec256 f41 = f40; 963 e[0U] = f01; 964 e[1U] = f111; 965 e[2U] = f2; 966 e[3U] = f3; 967 e[4U] = f41; 968 uint64_t b = (uint64_t)0x1000000U; 969 Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); 970 Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; 971 e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); 972 Lib_IntVector_Intrinsics_vec256 *r = pre; 973 Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; 974 Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; 975 Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; 976 Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; 977 Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; 978 Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; 979 Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; 980 Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; 981 Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; 982 Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; 983 Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; 984 Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; 985 Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; 986 Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; 987 Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; 988 Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; 989 Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; 990 Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; 991 Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; 992 Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; 993 Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); 994 Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); 995 Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); 996 Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); 997 Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); 998 Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); 999 Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); 1000 Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); 1001 Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); 1002 Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); 1003 Lib_IntVector_Intrinsics_vec256 1004 a03 = 1005 Lib_IntVector_Intrinsics_vec256_add64(a02, 1006 Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); 1007 Lib_IntVector_Intrinsics_vec256 1008 a13 = 1009 Lib_IntVector_Intrinsics_vec256_add64(a12, 1010 Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); 1011 Lib_IntVector_Intrinsics_vec256 1012 a23 = 1013 Lib_IntVector_Intrinsics_vec256_add64(a22, 1014 Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); 1015 Lib_IntVector_Intrinsics_vec256 1016 a33 = 1017 Lib_IntVector_Intrinsics_vec256_add64(a32, 1018 Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); 1019 Lib_IntVector_Intrinsics_vec256 1020 a43 = 1021 Lib_IntVector_Intrinsics_vec256_add64(a42, 1022 Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); 1023 Lib_IntVector_Intrinsics_vec256 1024 a04 = 1025 Lib_IntVector_Intrinsics_vec256_add64(a03, 1026 Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); 1027 Lib_IntVector_Intrinsics_vec256 1028 a14 = 1029 Lib_IntVector_Intrinsics_vec256_add64(a13, 1030 Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); 1031 Lib_IntVector_Intrinsics_vec256 1032 a24 = 1033 Lib_IntVector_Intrinsics_vec256_add64(a23, 1034 Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); 1035 Lib_IntVector_Intrinsics_vec256 1036 a34 = 1037 Lib_IntVector_Intrinsics_vec256_add64(a33, 1038 Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); 1039 Lib_IntVector_Intrinsics_vec256 1040 a44 = 1041 Lib_IntVector_Intrinsics_vec256_add64(a43, 1042 Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); 1043 Lib_IntVector_Intrinsics_vec256 1044 a05 = 1045 Lib_IntVector_Intrinsics_vec256_add64(a04, 1046 Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); 1047 Lib_IntVector_Intrinsics_vec256 1048 a15 = 1049 Lib_IntVector_Intrinsics_vec256_add64(a14, 1050 Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); 1051 Lib_IntVector_Intrinsics_vec256 1052 a25 = 1053 Lib_IntVector_Intrinsics_vec256_add64(a24, 1054 Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); 1055 Lib_IntVector_Intrinsics_vec256 1056 a35 = 1057 Lib_IntVector_Intrinsics_vec256_add64(a34, 1058 Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); 1059 Lib_IntVector_Intrinsics_vec256 1060 a45 = 1061 Lib_IntVector_Intrinsics_vec256_add64(a44, 1062 Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); 1063 Lib_IntVector_Intrinsics_vec256 1064 a06 = 1065 Lib_IntVector_Intrinsics_vec256_add64(a05, 1066 Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); 1067 Lib_IntVector_Intrinsics_vec256 1068 a16 = 1069 Lib_IntVector_Intrinsics_vec256_add64(a15, 1070 Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); 1071 Lib_IntVector_Intrinsics_vec256 1072 a26 = 1073 Lib_IntVector_Intrinsics_vec256_add64(a25, 1074 Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); 1075 Lib_IntVector_Intrinsics_vec256 1076 a36 = 1077 Lib_IntVector_Intrinsics_vec256_add64(a35, 1078 Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); 1079 Lib_IntVector_Intrinsics_vec256 1080 a46 = 1081 Lib_IntVector_Intrinsics_vec256_add64(a45, 1082 Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); 1083 Lib_IntVector_Intrinsics_vec256 t0 = a06; 1084 Lib_IntVector_Intrinsics_vec256 t1 = a16; 1085 Lib_IntVector_Intrinsics_vec256 t2 = a26; 1086 Lib_IntVector_Intrinsics_vec256 t3 = a36; 1087 Lib_IntVector_Intrinsics_vec256 t4 = a46; 1088 Lib_IntVector_Intrinsics_vec256 1089 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); 1090 Lib_IntVector_Intrinsics_vec256 1091 z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); 1092 Lib_IntVector_Intrinsics_vec256 1093 z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); 1094 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); 1095 Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); 1096 Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); 1097 Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); 1098 Lib_IntVector_Intrinsics_vec256 1099 z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); 1100 Lib_IntVector_Intrinsics_vec256 1101 z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); 1102 Lib_IntVector_Intrinsics_vec256 1103 t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); 1104 Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); 1105 Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); 1106 Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); 1107 Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); 1108 Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); 1109 Lib_IntVector_Intrinsics_vec256 1110 z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); 1111 Lib_IntVector_Intrinsics_vec256 1112 z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); 1113 Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); 1114 Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); 1115 Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); 1116 Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); 1117 Lib_IntVector_Intrinsics_vec256 1118 z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); 1119 Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); 1120 Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); 1121 Lib_IntVector_Intrinsics_vec256 o0 = x02; 1122 Lib_IntVector_Intrinsics_vec256 o1 = x12; 1123 Lib_IntVector_Intrinsics_vec256 o2 = x21; 1124 Lib_IntVector_Intrinsics_vec256 o3 = x32; 1125 Lib_IntVector_Intrinsics_vec256 o4 = x42; 1126 acc[0U] = o0; 1127 acc[1U] = o1; 1128 acc[2U] = o2; 1129 acc[3U] = o3; 1130 acc[4U] = o4; 1131 Hacl_Poly1305_256_poly1305_finish(out, k, ctx); 1132 } 1133 1134 /** 1135 Encrypt a message `m` with key `k`. 1136 1137 The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption. 1138 Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory. 1139 1140 @param k Pointer to 32 bytes of memory where the AEAD key is read from. 1141 @param n Pointer to 12 bytes of memory where the AEAD nonce is read from. 1142 @param aadlen Length of the associated data. 1143 @param aad Pointer to `aadlen` bytes of memory where the associated data is read from. 1144 1145 @param mlen Length of the message. 1146 @param m Pointer to `mlen` bytes of memory where the message is read from. 1147 @param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to. 1148 @param mac Pointer to 16 bytes of memory where the mac is written to. 1149 */ 1150 void 1151 Hacl_Chacha20Poly1305_256_aead_encrypt( 1152 uint8_t *k, 1153 uint8_t *n, 1154 uint32_t aadlen, 1155 uint8_t *aad, 1156 uint32_t mlen, 1157 uint8_t *m, 1158 uint8_t *cipher, 1159 uint8_t *mac) 1160 { 1161 Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U); 1162 uint8_t tmp[64U] = { 0U }; 1163 Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); 1164 uint8_t *key = tmp; 1165 poly1305_do_256(key, aadlen, aad, mlen, cipher, mac); 1166 } 1167 1168 /** 1169 Decrypt a ciphertext `cipher` with key `k`. 1170 1171 The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption. 1172 Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory. 1173 1174 If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0. 1175 If decryption fails, the array `m` remains unchanged and the function returns the error code 1. 1176 1177 @param k Pointer to 32 bytes of memory where the AEAD key is read from. 1178 @param n Pointer to 12 bytes of memory where the AEAD nonce is read from. 1179 @param aadlen Length of the associated data. 1180 @param aad Pointer to `aadlen` bytes of memory where the associated data is read from. 1181 1182 @param mlen Length of the ciphertext. 1183 @param m Pointer to `mlen` bytes of memory where the message is written to. 1184 @param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from. 1185 @param mac Pointer to 16 bytes of memory where the mac is read from. 1186 1187 @returns 0 on succeess; 1 on failure. 1188 */ 1189 uint32_t 1190 Hacl_Chacha20Poly1305_256_aead_decrypt( 1191 uint8_t *k, 1192 uint8_t *n, 1193 uint32_t aadlen, 1194 uint8_t *aad, 1195 uint32_t mlen, 1196 uint8_t *m, 1197 uint8_t *cipher, 1198 uint8_t *mac) 1199 { 1200 uint8_t computed_mac[16U] = { 0U }; 1201 uint8_t tmp[64U] = { 0U }; 1202 Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); 1203 uint8_t *key = tmp; 1204 poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); 1205 uint8_t res = (uint8_t)255U; 1206 KRML_MAYBE_FOR16(i, 1207 (uint32_t)0U, 1208 (uint32_t)16U, 1209 (uint32_t)1U, 1210 uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); 1211 res = uu____0 & res;); 1212 uint8_t z = res; 1213 if (z == (uint8_t)255U) { 1214 Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U); 1215 return (uint32_t)0U; 1216 } 1217 return (uint32_t)1U; 1218 }