tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Hacl_Chacha20Poly1305_256.c (69988B)


      1 /* MIT License
      2 *
      3 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
      4 * Copyright (c) 2022-2023 HACL* Contributors
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to deal
      8 * in the Software without restriction, including without limitation the rights
      9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 * copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in all
     14 * copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22 * SOFTWARE.
     23 */
     24 
     25 #include "Hacl_Chacha20Poly1305_256.h"
     26 
     27 #include "internal/Hacl_Poly1305_256.h"
     28 #include "internal/Hacl_Krmllib.h"
     29 #include "libintvector.h"
     30 
     31 static inline void
     32 poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
     33 {
     34    uint32_t n = len / (uint32_t)16U;
     35    uint32_t r = len % (uint32_t)16U;
     36    uint8_t *blocks = text;
     37    uint8_t *rem = text + n * (uint32_t)16U;
     38    Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
     39    Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
     40    uint32_t sz_block = (uint32_t)64U;
     41    uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
     42    uint8_t *t00 = blocks;
     43    if (len0 > (uint32_t)0U) {
     44        uint32_t bs = (uint32_t)64U;
     45        uint8_t *text0 = t00;
     46        Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
     47        uint32_t len1 = len0 - bs;
     48        uint8_t *text1 = t00 + bs;
     49        uint32_t nb = len1 / bs;
     50        for (uint32_t i = (uint32_t)0U; i < nb; i++) {
     51            uint8_t *block = text1 + i * bs;
     52            KRML_PRE_ALIGN(32)
     53            Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
     54            Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
     55            Lib_IntVector_Intrinsics_vec256
     56                hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
     57            Lib_IntVector_Intrinsics_vec256
     58                mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
     59            Lib_IntVector_Intrinsics_vec256
     60                m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
     61            Lib_IntVector_Intrinsics_vec256
     62                m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
     63            Lib_IntVector_Intrinsics_vec256
     64                m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
     65            Lib_IntVector_Intrinsics_vec256
     66                m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
     67            Lib_IntVector_Intrinsics_vec256
     68                m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
     69            Lib_IntVector_Intrinsics_vec256
     70                t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
     71            Lib_IntVector_Intrinsics_vec256
     72                t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
     73            Lib_IntVector_Intrinsics_vec256
     74                t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
     75            Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
     76            Lib_IntVector_Intrinsics_vec256
     77                t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
     78            Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
     79            Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
     80            Lib_IntVector_Intrinsics_vec256
     81                t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
     82            Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
     83            Lib_IntVector_Intrinsics_vec256
     84                o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
     85            Lib_IntVector_Intrinsics_vec256 o00 = o5;
     86            Lib_IntVector_Intrinsics_vec256 o11 = o10;
     87            Lib_IntVector_Intrinsics_vec256 o21 = o20;
     88            Lib_IntVector_Intrinsics_vec256 o31 = o30;
     89            Lib_IntVector_Intrinsics_vec256 o41 = o40;
     90            e[0U] = o00;
     91            e[1U] = o11;
     92            e[2U] = o21;
     93            e[3U] = o31;
     94            e[4U] = o41;
     95            uint64_t b = (uint64_t)0x1000000U;
     96            Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
     97            Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
     98            e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
     99            Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
    100            Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
    101            Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
    102            Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
    103            Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
    104            Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
    105            Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
    106            Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
    107            Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
    108            Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
    109            Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
    110            Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
    111            Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
    112            Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
    113            Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
    114            Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
    115            Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
    116            Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
    117            Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
    118            Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
    119            Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
    120            Lib_IntVector_Intrinsics_vec256
    121                a01 =
    122                    Lib_IntVector_Intrinsics_vec256_add64(a0,
    123                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
    124            Lib_IntVector_Intrinsics_vec256
    125                a11 =
    126                    Lib_IntVector_Intrinsics_vec256_add64(a1,
    127                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
    128            Lib_IntVector_Intrinsics_vec256
    129                a21 =
    130                    Lib_IntVector_Intrinsics_vec256_add64(a2,
    131                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
    132            Lib_IntVector_Intrinsics_vec256
    133                a31 =
    134                    Lib_IntVector_Intrinsics_vec256_add64(a3,
    135                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
    136            Lib_IntVector_Intrinsics_vec256
    137                a41 =
    138                    Lib_IntVector_Intrinsics_vec256_add64(a4,
    139                                                          Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
    140            Lib_IntVector_Intrinsics_vec256
    141                a02 =
    142                    Lib_IntVector_Intrinsics_vec256_add64(a01,
    143                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
    144            Lib_IntVector_Intrinsics_vec256
    145                a12 =
    146                    Lib_IntVector_Intrinsics_vec256_add64(a11,
    147                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
    148            Lib_IntVector_Intrinsics_vec256
    149                a22 =
    150                    Lib_IntVector_Intrinsics_vec256_add64(a21,
    151                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
    152            Lib_IntVector_Intrinsics_vec256
    153                a32 =
    154                    Lib_IntVector_Intrinsics_vec256_add64(a31,
    155                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
    156            Lib_IntVector_Intrinsics_vec256
    157                a42 =
    158                    Lib_IntVector_Intrinsics_vec256_add64(a41,
    159                                                          Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
    160            Lib_IntVector_Intrinsics_vec256
    161                a03 =
    162                    Lib_IntVector_Intrinsics_vec256_add64(a02,
    163                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
    164            Lib_IntVector_Intrinsics_vec256
    165                a13 =
    166                    Lib_IntVector_Intrinsics_vec256_add64(a12,
    167                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
    168            Lib_IntVector_Intrinsics_vec256
    169                a23 =
    170                    Lib_IntVector_Intrinsics_vec256_add64(a22,
    171                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
    172            Lib_IntVector_Intrinsics_vec256
    173                a33 =
    174                    Lib_IntVector_Intrinsics_vec256_add64(a32,
    175                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
    176            Lib_IntVector_Intrinsics_vec256
    177                a43 =
    178                    Lib_IntVector_Intrinsics_vec256_add64(a42,
    179                                                          Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
    180            Lib_IntVector_Intrinsics_vec256
    181                a04 =
    182                    Lib_IntVector_Intrinsics_vec256_add64(a03,
    183                                                          Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
    184            Lib_IntVector_Intrinsics_vec256
    185                a14 =
    186                    Lib_IntVector_Intrinsics_vec256_add64(a13,
    187                                                          Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
    188            Lib_IntVector_Intrinsics_vec256
    189                a24 =
    190                    Lib_IntVector_Intrinsics_vec256_add64(a23,
    191                                                          Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
    192            Lib_IntVector_Intrinsics_vec256
    193                a34 =
    194                    Lib_IntVector_Intrinsics_vec256_add64(a33,
    195                                                          Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
    196            Lib_IntVector_Intrinsics_vec256
    197                a44 =
    198                    Lib_IntVector_Intrinsics_vec256_add64(a43,
    199                                                          Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
    200            Lib_IntVector_Intrinsics_vec256 t01 = a04;
    201            Lib_IntVector_Intrinsics_vec256 t1 = a14;
    202            Lib_IntVector_Intrinsics_vec256 t2 = a24;
    203            Lib_IntVector_Intrinsics_vec256 t3 = a34;
    204            Lib_IntVector_Intrinsics_vec256 t4 = a44;
    205            Lib_IntVector_Intrinsics_vec256
    206                mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
    207            Lib_IntVector_Intrinsics_vec256
    208                z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
    209            Lib_IntVector_Intrinsics_vec256
    210                z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
    211            Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
    212            Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
    213            Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
    214            Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
    215            Lib_IntVector_Intrinsics_vec256
    216                z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
    217            Lib_IntVector_Intrinsics_vec256
    218                z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
    219            Lib_IntVector_Intrinsics_vec256
    220                t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
    221            Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
    222            Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
    223            Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
    224            Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
    225            Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
    226            Lib_IntVector_Intrinsics_vec256
    227                z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
    228            Lib_IntVector_Intrinsics_vec256
    229                z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
    230            Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
    231            Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
    232            Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
    233            Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
    234            Lib_IntVector_Intrinsics_vec256
    235                z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
    236            Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
    237            Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
    238            Lib_IntVector_Intrinsics_vec256 o01 = x02;
    239            Lib_IntVector_Intrinsics_vec256 o12 = x12;
    240            Lib_IntVector_Intrinsics_vec256 o22 = x21;
    241            Lib_IntVector_Intrinsics_vec256 o32 = x32;
    242            Lib_IntVector_Intrinsics_vec256 o42 = x42;
    243            acc0[0U] = o01;
    244            acc0[1U] = o12;
    245            acc0[2U] = o22;
    246            acc0[3U] = o32;
    247            acc0[4U] = o42;
    248            Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
    249            Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
    250            Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
    251            Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
    252            Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
    253            Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
    254            Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
    255            Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
    256            Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
    257            Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
    258            Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
    259            Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
    260            Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
    261            Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
    262            Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
    263            acc0[0U] = o0;
    264            acc0[1U] = o1;
    265            acc0[2U] = o2;
    266            acc0[3U] = o3;
    267            acc0[4U] = o4;
    268        }
    269        Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
    270    }
    271    uint32_t len1 = n * (uint32_t)16U - len0;
    272    uint8_t *t10 = blocks + len0;
    273    uint32_t nb = len1 / (uint32_t)16U;
    274    uint32_t rem1 = len1 % (uint32_t)16U;
    275    for (uint32_t i = (uint32_t)0U; i < nb; i++) {
    276        uint8_t *block = t10 + i * (uint32_t)16U;
    277        KRML_PRE_ALIGN(32)
    278        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
    279        uint64_t u0 = load64_le(block);
    280        uint64_t lo = u0;
    281        uint64_t u = load64_le(block + (uint32_t)8U);
    282        uint64_t hi = u;
    283        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
    284        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
    285        Lib_IntVector_Intrinsics_vec256
    286            f010 =
    287                Lib_IntVector_Intrinsics_vec256_and(f0,
    288                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    289        Lib_IntVector_Intrinsics_vec256
    290            f110 =
    291                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    292                                                                                                  (uint32_t)26U),
    293                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    294        Lib_IntVector_Intrinsics_vec256
    295            f20 =
    296                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    297                                                                                                 (uint32_t)52U),
    298                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
    299                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
    300                                                                                                (uint32_t)12U));
    301        Lib_IntVector_Intrinsics_vec256
    302            f30 =
    303                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
    304                                                                                                  (uint32_t)14U),
    305                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    306        Lib_IntVector_Intrinsics_vec256
    307            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
    308        Lib_IntVector_Intrinsics_vec256 f01 = f010;
    309        Lib_IntVector_Intrinsics_vec256 f111 = f110;
    310        Lib_IntVector_Intrinsics_vec256 f2 = f20;
    311        Lib_IntVector_Intrinsics_vec256 f3 = f30;
    312        Lib_IntVector_Intrinsics_vec256 f41 = f40;
    313        e[0U] = f01;
    314        e[1U] = f111;
    315        e[2U] = f2;
    316        e[3U] = f3;
    317        e[4U] = f41;
    318        uint64_t b = (uint64_t)0x1000000U;
    319        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
    320        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
    321        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
    322        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
    323        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
    324        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
    325        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
    326        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
    327        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
    328        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
    329        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
    330        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
    331        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
    332        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
    333        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
    334        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
    335        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
    336        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
    337        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
    338        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
    339        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
    340        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
    341        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
    342        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
    343        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
    344        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
    345        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
    346        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
    347        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
    348        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
    349        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
    350        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
    351        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
    352        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
    353        Lib_IntVector_Intrinsics_vec256
    354            a03 =
    355                Lib_IntVector_Intrinsics_vec256_add64(a02,
    356                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
    357        Lib_IntVector_Intrinsics_vec256
    358            a13 =
    359                Lib_IntVector_Intrinsics_vec256_add64(a12,
    360                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
    361        Lib_IntVector_Intrinsics_vec256
    362            a23 =
    363                Lib_IntVector_Intrinsics_vec256_add64(a22,
    364                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
    365        Lib_IntVector_Intrinsics_vec256
    366            a33 =
    367                Lib_IntVector_Intrinsics_vec256_add64(a32,
    368                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
    369        Lib_IntVector_Intrinsics_vec256
    370            a43 =
    371                Lib_IntVector_Intrinsics_vec256_add64(a42,
    372                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
    373        Lib_IntVector_Intrinsics_vec256
    374            a04 =
    375                Lib_IntVector_Intrinsics_vec256_add64(a03,
    376                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
    377        Lib_IntVector_Intrinsics_vec256
    378            a14 =
    379                Lib_IntVector_Intrinsics_vec256_add64(a13,
    380                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
    381        Lib_IntVector_Intrinsics_vec256
    382            a24 =
    383                Lib_IntVector_Intrinsics_vec256_add64(a23,
    384                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
    385        Lib_IntVector_Intrinsics_vec256
    386            a34 =
    387                Lib_IntVector_Intrinsics_vec256_add64(a33,
    388                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
    389        Lib_IntVector_Intrinsics_vec256
    390            a44 =
    391                Lib_IntVector_Intrinsics_vec256_add64(a43,
    392                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
    393        Lib_IntVector_Intrinsics_vec256
    394            a05 =
    395                Lib_IntVector_Intrinsics_vec256_add64(a04,
    396                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
    397        Lib_IntVector_Intrinsics_vec256
    398            a15 =
    399                Lib_IntVector_Intrinsics_vec256_add64(a14,
    400                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
    401        Lib_IntVector_Intrinsics_vec256
    402            a25 =
    403                Lib_IntVector_Intrinsics_vec256_add64(a24,
    404                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
    405        Lib_IntVector_Intrinsics_vec256
    406            a35 =
    407                Lib_IntVector_Intrinsics_vec256_add64(a34,
    408                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
    409        Lib_IntVector_Intrinsics_vec256
    410            a45 =
    411                Lib_IntVector_Intrinsics_vec256_add64(a44,
    412                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
    413        Lib_IntVector_Intrinsics_vec256
    414            a06 =
    415                Lib_IntVector_Intrinsics_vec256_add64(a05,
    416                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
    417        Lib_IntVector_Intrinsics_vec256
    418            a16 =
    419                Lib_IntVector_Intrinsics_vec256_add64(a15,
    420                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
    421        Lib_IntVector_Intrinsics_vec256
    422            a26 =
    423                Lib_IntVector_Intrinsics_vec256_add64(a25,
    424                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
    425        Lib_IntVector_Intrinsics_vec256
    426            a36 =
    427                Lib_IntVector_Intrinsics_vec256_add64(a35,
    428                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
    429        Lib_IntVector_Intrinsics_vec256
    430            a46 =
    431                Lib_IntVector_Intrinsics_vec256_add64(a45,
    432                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
    433        Lib_IntVector_Intrinsics_vec256 t01 = a06;
    434        Lib_IntVector_Intrinsics_vec256 t11 = a16;
    435        Lib_IntVector_Intrinsics_vec256 t2 = a26;
    436        Lib_IntVector_Intrinsics_vec256 t3 = a36;
    437        Lib_IntVector_Intrinsics_vec256 t4 = a46;
    438        Lib_IntVector_Intrinsics_vec256
    439            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
    440        Lib_IntVector_Intrinsics_vec256
    441            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
    442        Lib_IntVector_Intrinsics_vec256
    443            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
    444        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
    445        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
    446        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
    447        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
    448        Lib_IntVector_Intrinsics_vec256
    449            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
    450        Lib_IntVector_Intrinsics_vec256
    451            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
    452        Lib_IntVector_Intrinsics_vec256
    453            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
    454        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
    455        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
    456        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
    457        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
    458        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
    459        Lib_IntVector_Intrinsics_vec256
    460            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
    461        Lib_IntVector_Intrinsics_vec256
    462            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
    463        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
    464        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
    465        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
    466        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
    467        Lib_IntVector_Intrinsics_vec256
    468            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
    469        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
    470        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
    471        Lib_IntVector_Intrinsics_vec256 o0 = x02;
    472        Lib_IntVector_Intrinsics_vec256 o1 = x12;
    473        Lib_IntVector_Intrinsics_vec256 o2 = x21;
    474        Lib_IntVector_Intrinsics_vec256 o3 = x32;
    475        Lib_IntVector_Intrinsics_vec256 o4 = x42;
    476        acc0[0U] = o0;
    477        acc0[1U] = o1;
    478        acc0[2U] = o2;
    479        acc0[3U] = o3;
    480        acc0[4U] = o4;
    481    }
    482    if (rem1 > (uint32_t)0U) {
    483        uint8_t *last = t10 + nb * (uint32_t)16U;
    484        KRML_PRE_ALIGN(32)
    485        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
    486        uint8_t tmp[16U] = { 0U };
    487        memcpy(tmp, last, rem1 * sizeof(uint8_t));
    488        uint64_t u0 = load64_le(tmp);
    489        uint64_t lo = u0;
    490        uint64_t u = load64_le(tmp + (uint32_t)8U);
    491        uint64_t hi = u;
    492        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
    493        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
    494        Lib_IntVector_Intrinsics_vec256
    495            f010 =
    496                Lib_IntVector_Intrinsics_vec256_and(f0,
    497                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    498        Lib_IntVector_Intrinsics_vec256
    499            f110 =
    500                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    501                                                                                                  (uint32_t)26U),
    502                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    503        Lib_IntVector_Intrinsics_vec256
    504            f20 =
    505                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    506                                                                                                 (uint32_t)52U),
    507                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
    508                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
    509                                                                                                (uint32_t)12U));
    510        Lib_IntVector_Intrinsics_vec256
    511            f30 =
    512                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
    513                                                                                                  (uint32_t)14U),
    514                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    515        Lib_IntVector_Intrinsics_vec256
    516            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
    517        Lib_IntVector_Intrinsics_vec256 f01 = f010;
    518        Lib_IntVector_Intrinsics_vec256 f111 = f110;
    519        Lib_IntVector_Intrinsics_vec256 f2 = f20;
    520        Lib_IntVector_Intrinsics_vec256 f3 = f30;
    521        Lib_IntVector_Intrinsics_vec256 f4 = f40;
    522        e[0U] = f01;
    523        e[1U] = f111;
    524        e[2U] = f2;
    525        e[3U] = f3;
    526        e[4U] = f4;
    527        uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
    528        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
    529        Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
    530        e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
    531        Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
    532        Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
    533        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
    534        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
    535        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
    536        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
    537        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
    538        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
    539        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
    540        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
    541        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
    542        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
    543        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
    544        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
    545        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
    546        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
    547        Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
    548        Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
    549        Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
    550        Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
    551        Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
    552        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
    553        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
    554        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
    555        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
    556        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
    557        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
    558        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
    559        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
    560        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
    561        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
    562        Lib_IntVector_Intrinsics_vec256
    563            a03 =
    564                Lib_IntVector_Intrinsics_vec256_add64(a02,
    565                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
    566        Lib_IntVector_Intrinsics_vec256
    567            a13 =
    568                Lib_IntVector_Intrinsics_vec256_add64(a12,
    569                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
    570        Lib_IntVector_Intrinsics_vec256
    571            a23 =
    572                Lib_IntVector_Intrinsics_vec256_add64(a22,
    573                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
    574        Lib_IntVector_Intrinsics_vec256
    575            a33 =
    576                Lib_IntVector_Intrinsics_vec256_add64(a32,
    577                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
    578        Lib_IntVector_Intrinsics_vec256
    579            a43 =
    580                Lib_IntVector_Intrinsics_vec256_add64(a42,
    581                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
    582        Lib_IntVector_Intrinsics_vec256
    583            a04 =
    584                Lib_IntVector_Intrinsics_vec256_add64(a03,
    585                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
    586        Lib_IntVector_Intrinsics_vec256
    587            a14 =
    588                Lib_IntVector_Intrinsics_vec256_add64(a13,
    589                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
    590        Lib_IntVector_Intrinsics_vec256
    591            a24 =
    592                Lib_IntVector_Intrinsics_vec256_add64(a23,
    593                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
    594        Lib_IntVector_Intrinsics_vec256
    595            a34 =
    596                Lib_IntVector_Intrinsics_vec256_add64(a33,
    597                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
    598        Lib_IntVector_Intrinsics_vec256
    599            a44 =
    600                Lib_IntVector_Intrinsics_vec256_add64(a43,
    601                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
    602        Lib_IntVector_Intrinsics_vec256
    603            a05 =
    604                Lib_IntVector_Intrinsics_vec256_add64(a04,
    605                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
    606        Lib_IntVector_Intrinsics_vec256
    607            a15 =
    608                Lib_IntVector_Intrinsics_vec256_add64(a14,
    609                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
    610        Lib_IntVector_Intrinsics_vec256
    611            a25 =
    612                Lib_IntVector_Intrinsics_vec256_add64(a24,
    613                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
    614        Lib_IntVector_Intrinsics_vec256
    615            a35 =
    616                Lib_IntVector_Intrinsics_vec256_add64(a34,
    617                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
    618        Lib_IntVector_Intrinsics_vec256
    619            a45 =
    620                Lib_IntVector_Intrinsics_vec256_add64(a44,
    621                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
    622        Lib_IntVector_Intrinsics_vec256
    623            a06 =
    624                Lib_IntVector_Intrinsics_vec256_add64(a05,
    625                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
    626        Lib_IntVector_Intrinsics_vec256
    627            a16 =
    628                Lib_IntVector_Intrinsics_vec256_add64(a15,
    629                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
    630        Lib_IntVector_Intrinsics_vec256
    631            a26 =
    632                Lib_IntVector_Intrinsics_vec256_add64(a25,
    633                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
    634        Lib_IntVector_Intrinsics_vec256
    635            a36 =
    636                Lib_IntVector_Intrinsics_vec256_add64(a35,
    637                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
    638        Lib_IntVector_Intrinsics_vec256
    639            a46 =
    640                Lib_IntVector_Intrinsics_vec256_add64(a45,
    641                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
    642        Lib_IntVector_Intrinsics_vec256 t01 = a06;
    643        Lib_IntVector_Intrinsics_vec256 t11 = a16;
    644        Lib_IntVector_Intrinsics_vec256 t2 = a26;
    645        Lib_IntVector_Intrinsics_vec256 t3 = a36;
    646        Lib_IntVector_Intrinsics_vec256 t4 = a46;
    647        Lib_IntVector_Intrinsics_vec256
    648            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
    649        Lib_IntVector_Intrinsics_vec256
    650            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
    651        Lib_IntVector_Intrinsics_vec256
    652            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
    653        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
    654        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
    655        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
    656        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
    657        Lib_IntVector_Intrinsics_vec256
    658            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
    659        Lib_IntVector_Intrinsics_vec256
    660            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
    661        Lib_IntVector_Intrinsics_vec256
    662            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
    663        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
    664        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
    665        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
    666        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
    667        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
    668        Lib_IntVector_Intrinsics_vec256
    669            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
    670        Lib_IntVector_Intrinsics_vec256
    671            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
    672        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
    673        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
    674        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
    675        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
    676        Lib_IntVector_Intrinsics_vec256
    677            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
    678        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
    679        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
    680        Lib_IntVector_Intrinsics_vec256 o0 = x02;
    681        Lib_IntVector_Intrinsics_vec256 o1 = x12;
    682        Lib_IntVector_Intrinsics_vec256 o2 = x21;
    683        Lib_IntVector_Intrinsics_vec256 o3 = x32;
    684        Lib_IntVector_Intrinsics_vec256 o4 = x42;
    685        acc0[0U] = o0;
    686        acc0[1U] = o1;
    687        acc0[2U] = o2;
    688        acc0[3U] = o3;
    689        acc0[4U] = o4;
    690    }
    691    uint8_t tmp[16U] = { 0U };
    692    memcpy(tmp, rem, r * sizeof(uint8_t));
    693    if (r > (uint32_t)0U) {
    694        Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
    695        Lib_IntVector_Intrinsics_vec256 *acc = ctx;
    696        KRML_PRE_ALIGN(32)
    697        Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
    698        uint64_t u0 = load64_le(tmp);
    699        uint64_t lo = u0;
    700        uint64_t u = load64_le(tmp + (uint32_t)8U);
    701        uint64_t hi = u;
    702        Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
    703        Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
    704        Lib_IntVector_Intrinsics_vec256
    705            f010 =
    706                Lib_IntVector_Intrinsics_vec256_and(f0,
    707                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    708        Lib_IntVector_Intrinsics_vec256
    709            f110 =
    710                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    711                                                                                                  (uint32_t)26U),
    712                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    713        Lib_IntVector_Intrinsics_vec256
    714            f20 =
    715                Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    716                                                                                                 (uint32_t)52U),
    717                                                   Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
    718                                                                                                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
    719                                                                                                (uint32_t)12U));
    720        Lib_IntVector_Intrinsics_vec256
    721            f30 =
    722                Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
    723                                                                                                  (uint32_t)14U),
    724                                                    Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    725        Lib_IntVector_Intrinsics_vec256
    726            f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
    727        Lib_IntVector_Intrinsics_vec256 f01 = f010;
    728        Lib_IntVector_Intrinsics_vec256 f111 = f110;
    729        Lib_IntVector_Intrinsics_vec256 f2 = f20;
    730        Lib_IntVector_Intrinsics_vec256 f3 = f30;
    731        Lib_IntVector_Intrinsics_vec256 f41 = f40;
    732        e[0U] = f01;
    733        e[1U] = f111;
    734        e[2U] = f2;
    735        e[3U] = f3;
    736        e[4U] = f41;
    737        uint64_t b = (uint64_t)0x1000000U;
    738        Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
    739        Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
    740        e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
    741        Lib_IntVector_Intrinsics_vec256 *r1 = pre;
    742        Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
    743        Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
    744        Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
    745        Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
    746        Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
    747        Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
    748        Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
    749        Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
    750        Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
    751        Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
    752        Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
    753        Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
    754        Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
    755        Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
    756        Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
    757        Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
    758        Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
    759        Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
    760        Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
    761        Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
    762        Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
    763        Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
    764        Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
    765        Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
    766        Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
    767        Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
    768        Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
    769        Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
    770        Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
    771        Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
    772        Lib_IntVector_Intrinsics_vec256
    773            a03 =
    774                Lib_IntVector_Intrinsics_vec256_add64(a02,
    775                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
    776        Lib_IntVector_Intrinsics_vec256
    777            a13 =
    778                Lib_IntVector_Intrinsics_vec256_add64(a12,
    779                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
    780        Lib_IntVector_Intrinsics_vec256
    781            a23 =
    782                Lib_IntVector_Intrinsics_vec256_add64(a22,
    783                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
    784        Lib_IntVector_Intrinsics_vec256
    785            a33 =
    786                Lib_IntVector_Intrinsics_vec256_add64(a32,
    787                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
    788        Lib_IntVector_Intrinsics_vec256
    789            a43 =
    790                Lib_IntVector_Intrinsics_vec256_add64(a42,
    791                                                      Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
    792        Lib_IntVector_Intrinsics_vec256
    793            a04 =
    794                Lib_IntVector_Intrinsics_vec256_add64(a03,
    795                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
    796        Lib_IntVector_Intrinsics_vec256
    797            a14 =
    798                Lib_IntVector_Intrinsics_vec256_add64(a13,
    799                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
    800        Lib_IntVector_Intrinsics_vec256
    801            a24 =
    802                Lib_IntVector_Intrinsics_vec256_add64(a23,
    803                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
    804        Lib_IntVector_Intrinsics_vec256
    805            a34 =
    806                Lib_IntVector_Intrinsics_vec256_add64(a33,
    807                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
    808        Lib_IntVector_Intrinsics_vec256
    809            a44 =
    810                Lib_IntVector_Intrinsics_vec256_add64(a43,
    811                                                      Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
    812        Lib_IntVector_Intrinsics_vec256
    813            a05 =
    814                Lib_IntVector_Intrinsics_vec256_add64(a04,
    815                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
    816        Lib_IntVector_Intrinsics_vec256
    817            a15 =
    818                Lib_IntVector_Intrinsics_vec256_add64(a14,
    819                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
    820        Lib_IntVector_Intrinsics_vec256
    821            a25 =
    822                Lib_IntVector_Intrinsics_vec256_add64(a24,
    823                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
    824        Lib_IntVector_Intrinsics_vec256
    825            a35 =
    826                Lib_IntVector_Intrinsics_vec256_add64(a34,
    827                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
    828        Lib_IntVector_Intrinsics_vec256
    829            a45 =
    830                Lib_IntVector_Intrinsics_vec256_add64(a44,
    831                                                      Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
    832        Lib_IntVector_Intrinsics_vec256
    833            a06 =
    834                Lib_IntVector_Intrinsics_vec256_add64(a05,
    835                                                      Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
    836        Lib_IntVector_Intrinsics_vec256
    837            a16 =
    838                Lib_IntVector_Intrinsics_vec256_add64(a15,
    839                                                      Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
    840        Lib_IntVector_Intrinsics_vec256
    841            a26 =
    842                Lib_IntVector_Intrinsics_vec256_add64(a25,
    843                                                      Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
    844        Lib_IntVector_Intrinsics_vec256
    845            a36 =
    846                Lib_IntVector_Intrinsics_vec256_add64(a35,
    847                                                      Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
    848        Lib_IntVector_Intrinsics_vec256
    849            a46 =
    850                Lib_IntVector_Intrinsics_vec256_add64(a45,
    851                                                      Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
    852        Lib_IntVector_Intrinsics_vec256 t0 = a06;
    853        Lib_IntVector_Intrinsics_vec256 t1 = a16;
    854        Lib_IntVector_Intrinsics_vec256 t2 = a26;
    855        Lib_IntVector_Intrinsics_vec256 t3 = a36;
    856        Lib_IntVector_Intrinsics_vec256 t4 = a46;
    857        Lib_IntVector_Intrinsics_vec256
    858            mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
    859        Lib_IntVector_Intrinsics_vec256
    860            z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
    861        Lib_IntVector_Intrinsics_vec256
    862            z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
    863        Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
    864        Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
    865        Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
    866        Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
    867        Lib_IntVector_Intrinsics_vec256
    868            z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
    869        Lib_IntVector_Intrinsics_vec256
    870            z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
    871        Lib_IntVector_Intrinsics_vec256
    872            t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
    873        Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
    874        Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
    875        Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
    876        Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
    877        Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
    878        Lib_IntVector_Intrinsics_vec256
    879            z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
    880        Lib_IntVector_Intrinsics_vec256
    881            z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
    882        Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
    883        Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
    884        Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
    885        Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
    886        Lib_IntVector_Intrinsics_vec256
    887            z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
    888        Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
    889        Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
    890        Lib_IntVector_Intrinsics_vec256 o0 = x02;
    891        Lib_IntVector_Intrinsics_vec256 o1 = x12;
    892        Lib_IntVector_Intrinsics_vec256 o2 = x21;
    893        Lib_IntVector_Intrinsics_vec256 o3 = x32;
    894        Lib_IntVector_Intrinsics_vec256 o4 = x42;
    895        acc[0U] = o0;
    896        acc[1U] = o1;
    897        acc[2U] = o2;
    898        acc[3U] = o3;
    899        acc[4U] = o4;
    900        return;
    901    }
    902 }
    903 
    904 static inline void
    905 poly1305_do_256(
    906    uint8_t *k,
    907    uint32_t aadlen,
    908    uint8_t *aad,
    909    uint32_t mlen,
    910    uint8_t *m,
    911    uint8_t *out)
    912 {
    913    KRML_PRE_ALIGN(32)
    914    Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
    915    uint8_t block[16U] = { 0U };
    916    Hacl_Poly1305_256_poly1305_init(ctx, k);
    917    if (aadlen != (uint32_t)0U) {
    918        poly1305_padded_256(ctx, aadlen, aad);
    919    }
    920    if (mlen != (uint32_t)0U) {
    921        poly1305_padded_256(ctx, mlen, m);
    922    }
    923    store64_le(block, (uint64_t)aadlen);
    924    store64_le(block + (uint32_t)8U, (uint64_t)mlen);
    925    Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
    926    Lib_IntVector_Intrinsics_vec256 *acc = ctx;
    927    KRML_PRE_ALIGN(32)
    928    Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
    929    uint64_t u0 = load64_le(block);
    930    uint64_t lo = u0;
    931    uint64_t u = load64_le(block + (uint32_t)8U);
    932    uint64_t hi = u;
    933    Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
    934    Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
    935    Lib_IntVector_Intrinsics_vec256
    936        f010 =
    937            Lib_IntVector_Intrinsics_vec256_and(f0,
    938                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    939    Lib_IntVector_Intrinsics_vec256
    940        f110 =
    941            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    942                                                                                              (uint32_t)26U),
    943                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    944    Lib_IntVector_Intrinsics_vec256
    945        f20 =
    946            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
    947                                                                                             (uint32_t)52U),
    948                                               Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
    949                                                                                                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
    950                                                                                            (uint32_t)12U));
    951    Lib_IntVector_Intrinsics_vec256
    952        f30 =
    953            Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
    954                                                                                              (uint32_t)14U),
    955                                                Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
    956    Lib_IntVector_Intrinsics_vec256
    957        f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
    958    Lib_IntVector_Intrinsics_vec256 f01 = f010;
    959    Lib_IntVector_Intrinsics_vec256 f111 = f110;
    960    Lib_IntVector_Intrinsics_vec256 f2 = f20;
    961    Lib_IntVector_Intrinsics_vec256 f3 = f30;
    962    Lib_IntVector_Intrinsics_vec256 f41 = f40;
    963    e[0U] = f01;
    964    e[1U] = f111;
    965    e[2U] = f2;
    966    e[3U] = f3;
    967    e[4U] = f41;
    968    uint64_t b = (uint64_t)0x1000000U;
    969    Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
    970    Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
    971    e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
    972    Lib_IntVector_Intrinsics_vec256 *r = pre;
    973    Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
    974    Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
    975    Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
    976    Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
    977    Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
    978    Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
    979    Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
    980    Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
    981    Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
    982    Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
    983    Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
    984    Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
    985    Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
    986    Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
    987    Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
    988    Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
    989    Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
    990    Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
    991    Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
    992    Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
    993    Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
    994    Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
    995    Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
    996    Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
    997    Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
    998    Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
    999    Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
   1000    Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
   1001    Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
   1002    Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
   1003    Lib_IntVector_Intrinsics_vec256
   1004        a03 =
   1005            Lib_IntVector_Intrinsics_vec256_add64(a02,
   1006                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
   1007    Lib_IntVector_Intrinsics_vec256
   1008        a13 =
   1009            Lib_IntVector_Intrinsics_vec256_add64(a12,
   1010                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
   1011    Lib_IntVector_Intrinsics_vec256
   1012        a23 =
   1013            Lib_IntVector_Intrinsics_vec256_add64(a22,
   1014                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
   1015    Lib_IntVector_Intrinsics_vec256
   1016        a33 =
   1017            Lib_IntVector_Intrinsics_vec256_add64(a32,
   1018                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
   1019    Lib_IntVector_Intrinsics_vec256
   1020        a43 =
   1021            Lib_IntVector_Intrinsics_vec256_add64(a42,
   1022                                                  Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
   1023    Lib_IntVector_Intrinsics_vec256
   1024        a04 =
   1025            Lib_IntVector_Intrinsics_vec256_add64(a03,
   1026                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
   1027    Lib_IntVector_Intrinsics_vec256
   1028        a14 =
   1029            Lib_IntVector_Intrinsics_vec256_add64(a13,
   1030                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
   1031    Lib_IntVector_Intrinsics_vec256
   1032        a24 =
   1033            Lib_IntVector_Intrinsics_vec256_add64(a23,
   1034                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
   1035    Lib_IntVector_Intrinsics_vec256
   1036        a34 =
   1037            Lib_IntVector_Intrinsics_vec256_add64(a33,
   1038                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
   1039    Lib_IntVector_Intrinsics_vec256
   1040        a44 =
   1041            Lib_IntVector_Intrinsics_vec256_add64(a43,
   1042                                                  Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
   1043    Lib_IntVector_Intrinsics_vec256
   1044        a05 =
   1045            Lib_IntVector_Intrinsics_vec256_add64(a04,
   1046                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
   1047    Lib_IntVector_Intrinsics_vec256
   1048        a15 =
   1049            Lib_IntVector_Intrinsics_vec256_add64(a14,
   1050                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
   1051    Lib_IntVector_Intrinsics_vec256
   1052        a25 =
   1053            Lib_IntVector_Intrinsics_vec256_add64(a24,
   1054                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
   1055    Lib_IntVector_Intrinsics_vec256
   1056        a35 =
   1057            Lib_IntVector_Intrinsics_vec256_add64(a34,
   1058                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
   1059    Lib_IntVector_Intrinsics_vec256
   1060        a45 =
   1061            Lib_IntVector_Intrinsics_vec256_add64(a44,
   1062                                                  Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
   1063    Lib_IntVector_Intrinsics_vec256
   1064        a06 =
   1065            Lib_IntVector_Intrinsics_vec256_add64(a05,
   1066                                                  Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
   1067    Lib_IntVector_Intrinsics_vec256
   1068        a16 =
   1069            Lib_IntVector_Intrinsics_vec256_add64(a15,
   1070                                                  Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
   1071    Lib_IntVector_Intrinsics_vec256
   1072        a26 =
   1073            Lib_IntVector_Intrinsics_vec256_add64(a25,
   1074                                                  Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
   1075    Lib_IntVector_Intrinsics_vec256
   1076        a36 =
   1077            Lib_IntVector_Intrinsics_vec256_add64(a35,
   1078                                                  Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
   1079    Lib_IntVector_Intrinsics_vec256
   1080        a46 =
   1081            Lib_IntVector_Intrinsics_vec256_add64(a45,
   1082                                                  Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
   1083    Lib_IntVector_Intrinsics_vec256 t0 = a06;
   1084    Lib_IntVector_Intrinsics_vec256 t1 = a16;
   1085    Lib_IntVector_Intrinsics_vec256 t2 = a26;
   1086    Lib_IntVector_Intrinsics_vec256 t3 = a36;
   1087    Lib_IntVector_Intrinsics_vec256 t4 = a46;
   1088    Lib_IntVector_Intrinsics_vec256
   1089        mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
   1090    Lib_IntVector_Intrinsics_vec256
   1091        z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
   1092    Lib_IntVector_Intrinsics_vec256
   1093        z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
   1094    Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
   1095    Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
   1096    Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
   1097    Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
   1098    Lib_IntVector_Intrinsics_vec256
   1099        z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
   1100    Lib_IntVector_Intrinsics_vec256
   1101        z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
   1102    Lib_IntVector_Intrinsics_vec256
   1103        t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
   1104    Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
   1105    Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
   1106    Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
   1107    Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
   1108    Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
   1109    Lib_IntVector_Intrinsics_vec256
   1110        z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
   1111    Lib_IntVector_Intrinsics_vec256
   1112        z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
   1113    Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
   1114    Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
   1115    Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
   1116    Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
   1117    Lib_IntVector_Intrinsics_vec256
   1118        z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
   1119    Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
   1120    Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
   1121    Lib_IntVector_Intrinsics_vec256 o0 = x02;
   1122    Lib_IntVector_Intrinsics_vec256 o1 = x12;
   1123    Lib_IntVector_Intrinsics_vec256 o2 = x21;
   1124    Lib_IntVector_Intrinsics_vec256 o3 = x32;
   1125    Lib_IntVector_Intrinsics_vec256 o4 = x42;
   1126    acc[0U] = o0;
   1127    acc[1U] = o1;
   1128    acc[2U] = o2;
   1129    acc[3U] = o3;
   1130    acc[4U] = o4;
   1131    Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
   1132 }
   1133 
   1134 /**
   1135 Encrypt a message `m` with key `k`.
   1136 
   1137 The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
   1138 Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
   1139 
   1140 @param k Pointer to 32 bytes of memory where the AEAD key is read from.
   1141 @param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
   1142 @param aadlen Length of the associated data.
   1143 @param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
   1144 
   1145 @param mlen Length of the message.
   1146 @param m Pointer to `mlen` bytes of memory where the message is read from.
   1147 @param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
   1148 @param mac Pointer to 16 bytes of memory where the mac is written to.
   1149 */
   1150 void
   1151 Hacl_Chacha20Poly1305_256_aead_encrypt(
   1152    uint8_t *k,
   1153    uint8_t *n,
   1154    uint32_t aadlen,
   1155    uint8_t *aad,
   1156    uint32_t mlen,
   1157    uint8_t *m,
   1158    uint8_t *cipher,
   1159    uint8_t *mac)
   1160 {
   1161    Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
   1162    uint8_t tmp[64U] = { 0U };
   1163    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
   1164    uint8_t *key = tmp;
   1165    poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
   1166 }
   1167 
   1168 /**
   1169 Decrypt a ciphertext `cipher` with key `k`.
   1170 
   1171 The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
   1172 Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
   1173 
   1174 If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
   1175 If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
   1176 
   1177 @param k Pointer to 32 bytes of memory where the AEAD key is read from.
   1178 @param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
   1179 @param aadlen Length of the associated data.
   1180 @param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
   1181 
   1182 @param mlen Length of the ciphertext.
   1183 @param m Pointer to `mlen` bytes of memory where the message is written to.
   1184 @param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
   1185 @param mac Pointer to 16 bytes of memory where the mac is read from.
   1186 
   1187 @returns 0 on succeess; 1 on failure.
   1188 */
   1189 uint32_t
   1190 Hacl_Chacha20Poly1305_256_aead_decrypt(
   1191    uint8_t *k,
   1192    uint8_t *n,
   1193    uint32_t aadlen,
   1194    uint8_t *aad,
   1195    uint32_t mlen,
   1196    uint8_t *m,
   1197    uint8_t *cipher,
   1198    uint8_t *mac)
   1199 {
   1200    uint8_t computed_mac[16U] = { 0U };
   1201    uint8_t tmp[64U] = { 0U };
   1202    Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
   1203    uint8_t *key = tmp;
   1204    poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
   1205    uint8_t res = (uint8_t)255U;
   1206    KRML_MAYBE_FOR16(i,
   1207                     (uint32_t)0U,
   1208                     (uint32_t)16U,
   1209                     (uint32_t)1U,
   1210                     uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
   1211                     res = uu____0 & res;);
   1212    uint8_t z = res;
   1213    if (z == (uint8_t)255U) {
   1214        Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
   1215        return (uint32_t)0U;
   1216    }
   1217    return (uint32_t)1U;
   1218 }