tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Hacl_P521.c (59334B)


      1 /* MIT License
      2 *
      3 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
      4 * Copyright (c) 2022-2023 HACL* Contributors
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to deal
      8 * in the Software without restriction, including without limitation the rights
      9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 * copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in all
     14 * copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22 * SOFTWARE.
     23 */
     24 
     25 #include "Hacl_P521.h"
     26 
     27 #include "internal/Hacl_Krmllib.h"
     28 #include "internal/Hacl_Bignum_Base.h"
     29 
     30 static inline uint64_t
     31 bn_is_eq_mask(uint64_t *x, uint64_t *y)
     32 {
     33    uint64_t mask = 0xFFFFFFFFFFFFFFFFULL;
     34    KRML_MAYBE_FOR9(i,
     35                    0U,
     36                    9U,
     37                    1U,
     38                    uint64_t uu____0 = FStar_UInt64_eq_mask(x[i], y[i]);
     39                    mask = uu____0 & mask;);
     40    uint64_t mask1 = mask;
     41    return mask1;
     42 }
     43 
     44 static inline void
     45 bn_cmovznz(uint64_t *a, uint64_t b, uint64_t *c, uint64_t *d)
     46 {
     47    uint64_t mask = ~FStar_UInt64_eq_mask(b, 0ULL);
     48    KRML_MAYBE_FOR9(i,
     49                    0U,
     50                    9U,
     51                    1U,
     52                    uint64_t *os = a;
     53                    uint64_t uu____0 = c[i];
     54                    uint64_t x = uu____0 ^ (mask & (d[i] ^ uu____0));
     55                    os[i] = x;);
     56 }
     57 
     58 static inline void
     59 bn_add_mod(uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d)
     60 {
     61    uint64_t c10 = 0ULL;
     62    KRML_MAYBE_FOR2(i,
     63                    0U,
     64                    2U,
     65                    1U,
     66                    uint64_t t1 = c[4U * i];
     67                    uint64_t t20 = d[4U * i];
     68                    uint64_t *res_i0 = a + 4U * i;
     69                    c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t1, t20, res_i0);
     70                    uint64_t t10 = c[4U * i + 1U];
     71                    uint64_t t21 = d[4U * i + 1U];
     72                    uint64_t *res_i1 = a + 4U * i + 1U;
     73                    c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t10, t21, res_i1);
     74                    uint64_t t11 = c[4U * i + 2U];
     75                    uint64_t t22 = d[4U * i + 2U];
     76                    uint64_t *res_i2 = a + 4U * i + 2U;
     77                    c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t11, t22, res_i2);
     78                    uint64_t t12 = c[4U * i + 3U];
     79                    uint64_t t2 = d[4U * i + 3U];
     80                    uint64_t *res_i = a + 4U * i + 3U;
     81                    c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t12, t2, res_i););
     82    {
     83        uint64_t t1 = c[8U];
     84        uint64_t t2 = d[8U];
     85        uint64_t *res_i = a + 8U;
     86        c10 = Lib_IntTypes_Intrinsics_add_carry_u64(c10, t1, t2, res_i);
     87    }
     88    uint64_t c0 = c10;
     89    uint64_t tmp[9U] = { 0U };
     90    uint64_t c1 = 0ULL;
     91    KRML_MAYBE_FOR2(i,
     92                    0U,
     93                    2U,
     94                    1U,
     95                    uint64_t t1 = a[4U * i];
     96                    uint64_t t20 = b[4U * i];
     97                    uint64_t *res_i0 = tmp + 4U * i;
     98                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
     99                    uint64_t t10 = a[4U * i + 1U];
    100                    uint64_t t21 = b[4U * i + 1U];
    101                    uint64_t *res_i1 = tmp + 4U * i + 1U;
    102                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
    103                    uint64_t t11 = a[4U * i + 2U];
    104                    uint64_t t22 = b[4U * i + 2U];
    105                    uint64_t *res_i2 = tmp + 4U * i + 2U;
    106                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
    107                    uint64_t t12 = a[4U * i + 3U];
    108                    uint64_t t2 = b[4U * i + 3U];
    109                    uint64_t *res_i = tmp + 4U * i + 3U;
    110                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i););
    111    {
    112        uint64_t t1 = a[8U];
    113        uint64_t t2 = b[8U];
    114        uint64_t *res_i = tmp + 8U;
    115        c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i);
    116    }
    117    uint64_t c11 = c1;
    118    uint64_t c2 = c0 - c11;
    119    KRML_MAYBE_FOR9(i,
    120                    0U,
    121                    9U,
    122                    1U,
    123                    uint64_t *os = a;
    124                    uint64_t x = (c2 & a[i]) | (~c2 & tmp[i]);
    125                    os[i] = x;);
    126 }
    127 
    128 static inline uint64_t
    129 bn_sub(uint64_t *a, uint64_t *b, uint64_t *c)
    130 {
    131    uint64_t c1 = 0ULL;
    132    KRML_MAYBE_FOR2(i,
    133                    0U,
    134                    2U,
    135                    1U,
    136                    uint64_t t1 = b[4U * i];
    137                    uint64_t t20 = c[4U * i];
    138                    uint64_t *res_i0 = a + 4U * i;
    139                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
    140                    uint64_t t10 = b[4U * i + 1U];
    141                    uint64_t t21 = c[4U * i + 1U];
    142                    uint64_t *res_i1 = a + 4U * i + 1U;
    143                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
    144                    uint64_t t11 = b[4U * i + 2U];
    145                    uint64_t t22 = c[4U * i + 2U];
    146                    uint64_t *res_i2 = a + 4U * i + 2U;
    147                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
    148                    uint64_t t12 = b[4U * i + 3U];
    149                    uint64_t t2 = c[4U * i + 3U];
    150                    uint64_t *res_i = a + 4U * i + 3U;
    151                    c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i););
    152    {
    153        uint64_t t1 = b[8U];
    154        uint64_t t2 = c[8U];
    155        uint64_t *res_i = a + 8U;
    156        c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i);
    157    }
    158    uint64_t c10 = c1;
    159    return c10;
    160 }
    161 
    162 static inline void
    163 bn_sub_mod(uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d)
    164 {
    165    uint64_t c10 = 0ULL;
    166    KRML_MAYBE_FOR2(i,
    167                    0U,
    168                    2U,
    169                    1U,
    170                    uint64_t t1 = c[4U * i];
    171                    uint64_t t20 = d[4U * i];
    172                    uint64_t *res_i0 = a + 4U * i;
    173                    c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t1, t20, res_i0);
    174                    uint64_t t10 = c[4U * i + 1U];
    175                    uint64_t t21 = d[4U * i + 1U];
    176                    uint64_t *res_i1 = a + 4U * i + 1U;
    177                    c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t10, t21, res_i1);
    178                    uint64_t t11 = c[4U * i + 2U];
    179                    uint64_t t22 = d[4U * i + 2U];
    180                    uint64_t *res_i2 = a + 4U * i + 2U;
    181                    c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t11, t22, res_i2);
    182                    uint64_t t12 = c[4U * i + 3U];
    183                    uint64_t t2 = d[4U * i + 3U];
    184                    uint64_t *res_i = a + 4U * i + 3U;
    185                    c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t12, t2, res_i););
    186    {
    187        uint64_t t1 = c[8U];
    188        uint64_t t2 = d[8U];
    189        uint64_t *res_i = a + 8U;
    190        c10 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c10, t1, t2, res_i);
    191    }
    192    uint64_t c0 = c10;
    193    uint64_t tmp[9U] = { 0U };
    194    uint64_t c1 = 0ULL;
    195    KRML_MAYBE_FOR2(i,
    196                    0U,
    197                    2U,
    198                    1U,
    199                    uint64_t t1 = a[4U * i];
    200                    uint64_t t20 = b[4U * i];
    201                    uint64_t *res_i0 = tmp + 4U * i;
    202                    c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t1, t20, res_i0);
    203                    uint64_t t10 = a[4U * i + 1U];
    204                    uint64_t t21 = b[4U * i + 1U];
    205                    uint64_t *res_i1 = tmp + 4U * i + 1U;
    206                    c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t10, t21, res_i1);
    207                    uint64_t t11 = a[4U * i + 2U];
    208                    uint64_t t22 = b[4U * i + 2U];
    209                    uint64_t *res_i2 = tmp + 4U * i + 2U;
    210                    c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t11, t22, res_i2);
    211                    uint64_t t12 = a[4U * i + 3U];
    212                    uint64_t t2 = b[4U * i + 3U];
    213                    uint64_t *res_i = tmp + 4U * i + 3U;
    214                    c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t12, t2, res_i););
    215    {
    216        uint64_t t1 = a[8U];
    217        uint64_t t2 = b[8U];
    218        uint64_t *res_i = tmp + 8U;
    219        c1 = Lib_IntTypes_Intrinsics_add_carry_u64(c1, t1, t2, res_i);
    220    }
    221    uint64_t c11 = c1;
    222    KRML_MAYBE_UNUSED_VAR(c11);
    223    uint64_t c2 = 0ULL - c0;
    224    KRML_MAYBE_FOR9(i,
    225                    0U,
    226                    9U,
    227                    1U,
    228                    uint64_t *os = a;
    229                    uint64_t x = (c2 & tmp[i]) | (~c2 & a[i]);
    230                    os[i] = x;);
    231 }
    232 
    233 static inline void
    234 bn_mul(uint64_t *a, uint64_t *b, uint64_t *c)
    235 {
    236    memset(a, 0U, 18U * sizeof(uint64_t));
    237    KRML_MAYBE_FOR9(
    238        i0,
    239        0U,
    240        9U,
    241        1U,
    242        uint64_t bj = c[i0];
    243        uint64_t *res_j = a + i0;
    244        uint64_t c1 = 0ULL;
    245        KRML_MAYBE_FOR2(i,
    246                        0U,
    247                        2U,
    248                        1U,
    249                        uint64_t a_i = b[4U * i];
    250                        uint64_t *res_i0 = res_j + 4U * i;
    251                        c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i0);
    252                        uint64_t a_i0 = b[4U * i + 1U];
    253                        uint64_t *res_i1 = res_j + 4U * i + 1U;
    254                        c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c1, res_i1);
    255                        uint64_t a_i1 = b[4U * i + 2U];
    256                        uint64_t *res_i2 = res_j + 4U * i + 2U;
    257                        c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c1, res_i2);
    258                        uint64_t a_i2 = b[4U * i + 3U];
    259                        uint64_t *res_i = res_j + 4U * i + 3U;
    260                        c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c1, res_i););
    261        {
    262            uint64_t a_i = b[8U];
    263            uint64_t *res_i = res_j + 8U;
    264            c1 = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c1, res_i);
    265        } uint64_t r = c1;
    266        a[9U + i0] = r;);
    267 }
    268 
    269 static inline void
    270 bn_sqr(uint64_t *a, uint64_t *b)
    271 {
    272    memset(a, 0U, 18U * sizeof(uint64_t));
    273    KRML_MAYBE_FOR9(
    274        i0,
    275        0U,
    276        9U,
    277        1U,
    278        uint64_t *ab = b;
    279        uint64_t a_j = b[i0];
    280        uint64_t *res_j = a + i0;
    281        uint64_t c = 0ULL;
    282        for (uint32_t i = 0U; i < i0 / 4U; i++) {
    283            uint64_t a_i = ab[4U * i];
    284            uint64_t *res_i0 = res_j + 4U * i;
    285            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0);
    286            uint64_t a_i0 = ab[4U * i + 1U];
    287            uint64_t *res_i1 = res_j + 4U * i + 1U;
    288            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1);
    289            uint64_t a_i1 = ab[4U * i + 2U];
    290            uint64_t *res_i2 = res_j + 4U * i + 2U;
    291            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2);
    292            uint64_t a_i2 = ab[4U * i + 3U];
    293            uint64_t *res_i = res_j + 4U * i + 3U;
    294            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i);
    295        } for (uint32_t i = i0 / 4U * 4U; i < i0; i++) {
    296            uint64_t a_i = ab[i];
    297            uint64_t *res_i = res_j + i;
    298            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i);
    299        } uint64_t r = c;
    300        a[i0 + i0] = r;);
    301    uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(18U, a, a, a);
    302    KRML_MAYBE_UNUSED_VAR(c0);
    303    uint64_t tmp[18U] = { 0U };
    304    KRML_MAYBE_FOR9(i,
    305                    0U,
    306                    9U,
    307                    1U,
    308                    FStar_UInt128_uint128 res = FStar_UInt128_mul_wide(b[i], b[i]);
    309                    uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, 64U));
    310                    uint64_t lo = FStar_UInt128_uint128_to_uint64(res);
    311                    tmp[2U * i] = lo;
    312                    tmp[2U * i + 1U] = hi;);
    313    uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(18U, a, tmp, a);
    314    KRML_MAYBE_UNUSED_VAR(c1);
    315 }
    316 
    317 static inline void
    318 bn_to_bytes_be(uint8_t *a, uint64_t *b)
    319 {
    320    uint8_t tmp[72U] = { 0U };
    321    KRML_MAYBE_FOR9(i, 0U, 9U, 1U, store64_be(tmp + i * 8U, b[9U - i - 1U]););
    322    memcpy(a, tmp + 6U, 66U * sizeof(uint8_t));
    323 }
    324 
    325 static inline void
    326 bn_from_bytes_be(uint64_t *a, uint8_t *b)
    327 {
    328    uint8_t tmp[72U] = { 0U };
    329    memcpy(tmp + 6U, b, 66U * sizeof(uint8_t));
    330    KRML_MAYBE_FOR9(i,
    331                    0U,
    332                    9U,
    333                    1U,
    334                    uint64_t *os = a;
    335                    uint64_t u = load64_be(tmp + (9U - i - 1U) * 8U);
    336                    uint64_t x = u;
    337                    os[i] = x;);
    338 }
    339 
    340 static inline void
    341 p521_make_prime(uint64_t *n)
    342 {
    343    n[0U] = 0xffffffffffffffffULL;
    344    n[1U] = 0xffffffffffffffffULL;
    345    n[2U] = 0xffffffffffffffffULL;
    346    n[3U] = 0xffffffffffffffffULL;
    347    n[4U] = 0xffffffffffffffffULL;
    348    n[5U] = 0xffffffffffffffffULL;
    349    n[6U] = 0xffffffffffffffffULL;
    350    n[7U] = 0xffffffffffffffffULL;
    351    n[8U] = 0x1ffULL;
    352 }
    353 
    354 static inline void
    355 p521_make_order(uint64_t *n)
    356 {
    357    n[0U] = 0xbb6fb71e91386409ULL;
    358    n[1U] = 0x3bb5c9b8899c47aeULL;
    359    n[2U] = 0x7fcc0148f709a5d0ULL;
    360    n[3U] = 0x51868783bf2f966bULL;
    361    n[4U] = 0xfffffffffffffffaULL;
    362    n[5U] = 0xffffffffffffffffULL;
    363    n[6U] = 0xffffffffffffffffULL;
    364    n[7U] = 0xffffffffffffffffULL;
    365    n[8U] = 0x1ffULL;
    366 }
    367 
    368 static inline void
    369 p521_make_a_coeff(uint64_t *a)
    370 {
    371    a[0U] = 0xfe7fffffffffffffULL;
    372    a[1U] = 0xffffffffffffffffULL;
    373    a[2U] = 0xffffffffffffffffULL;
    374    a[3U] = 0xffffffffffffffffULL;
    375    a[4U] = 0xffffffffffffffffULL;
    376    a[5U] = 0xffffffffffffffffULL;
    377    a[6U] = 0xffffffffffffffffULL;
    378    a[7U] = 0xffffffffffffffffULL;
    379    a[8U] = 0x01ffULL;
    380 }
    381 
    382 static inline void
    383 p521_make_b_coeff(uint64_t *b)
    384 {
    385    b[0U] = 0x8014654fae586387ULL;
    386    b[1U] = 0x78f7a28fea35a81fULL;
    387    b[2U] = 0x839ab9efc41e961aULL;
    388    b[3U] = 0xbd8b29605e9dd8dfULL;
    389    b[4U] = 0xf0ab0c9ca8f63f49ULL;
    390    b[5U] = 0xf9dc5a44c8c77884ULL;
    391    b[6U] = 0x77516d392dccd98aULL;
    392    b[7U] = 0x0fc94d10d05b42a0ULL;
    393    b[8U] = 0x4dULL;
    394 }
    395 
    396 static inline void
    397 p521_make_g_x(uint64_t *n)
    398 {
    399    n[0U] = 0xb331a16381adc101ULL;
    400    n[1U] = 0x4dfcbf3f18e172deULL;
    401    n[2U] = 0x6f19a459e0c2b521ULL;
    402    n[3U] = 0x947f0ee093d17fd4ULL;
    403    n[4U] = 0xdd50a5af3bf7f3acULL;
    404    n[5U] = 0x90fc1457b035a69eULL;
    405    n[6U] = 0x214e32409c829fdaULL;
    406    n[7U] = 0xe6cf1f65b311cadaULL;
    407    n[8U] = 0x74ULL;
    408 }
    409 
    410 static inline void
    411 p521_make_g_y(uint64_t *n)
    412 {
    413    n[0U] = 0x28460e4a5a9e268eULL;
    414    n[1U] = 0x20445f4a3b4fe8b3ULL;
    415    n[2U] = 0xb09a9e3843513961ULL;
    416    n[3U] = 0x2062a85c809fd683ULL;
    417    n[4U] = 0x164bf7394caf7a13ULL;
    418    n[5U] = 0x340bd7de8b939f33ULL;
    419    n[6U] = 0xeccc7aa224abcda2ULL;
    420    n[7U] = 0x022e452fda163e8dULL;
    421    n[8U] = 0x1e0ULL;
    422 }
    423 
    424 static inline void
    425 p521_make_fmont_R2(uint64_t *n)
    426 {
    427    n[0U] = 0x0ULL;
    428    n[1U] = 0x400000000000ULL;
    429    n[2U] = 0x0ULL;
    430    n[3U] = 0x0ULL;
    431    n[4U] = 0x0ULL;
    432    n[5U] = 0x0ULL;
    433    n[6U] = 0x0ULL;
    434    n[7U] = 0x0ULL;
    435    n[8U] = 0x0ULL;
    436 }
    437 
    438 static inline void
    439 p521_make_fzero(uint64_t *n)
    440 {
    441    memset(n, 0U, 9U * sizeof(uint64_t));
    442    n[0U] = 0ULL;
    443 }
    444 
    445 static inline void
    446 p521_make_fone(uint64_t *n)
    447 {
    448    n[0U] = 0x80000000000000ULL;
    449    n[1U] = 0x0ULL;
    450    n[2U] = 0x0ULL;
    451    n[3U] = 0x0ULL;
    452    n[4U] = 0x0ULL;
    453    n[5U] = 0x0ULL;
    454    n[6U] = 0x0ULL;
    455    n[7U] = 0x0ULL;
    456    n[8U] = 0x0ULL;
    457 }
    458 
    459 static inline void
    460 p521_make_qone(uint64_t *f)
    461 {
    462    f[0U] = 0xfb80000000000000ULL;
    463    f[1U] = 0x28a2482470b763cdULL;
    464    f[2U] = 0x17e2251b23bb31dcULL;
    465    f[3U] = 0xca4019ff5b847b2dULL;
    466    f[4U] = 0x2d73cbc3e206834ULL;
    467    f[5U] = 0x0ULL;
    468    f[6U] = 0x0ULL;
    469    f[7U] = 0x0ULL;
    470    f[8U] = 0x0ULL;
    471 }
    472 
    473 static inline void
    474 fmont_reduction(uint64_t *res, uint64_t *x)
    475 {
    476    uint64_t n[9U] = { 0U };
    477    p521_make_prime(n);
    478    uint64_t c0 = 0ULL;
    479    KRML_MAYBE_FOR9(
    480        i0,
    481        0U,
    482        9U,
    483        1U,
    484        uint64_t qj = 1ULL * x[i0];
    485        uint64_t *res_j0 = x + i0;
    486        uint64_t c = 0ULL;
    487        KRML_MAYBE_FOR2(i,
    488                        0U,
    489                        2U,
    490                        1U,
    491                        uint64_t a_i = n[4U * i];
    492                        uint64_t *res_i0 = res_j0 + 4U * i;
    493                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0);
    494                        uint64_t a_i0 = n[4U * i + 1U];
    495                        uint64_t *res_i1 = res_j0 + 4U * i + 1U;
    496                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1);
    497                        uint64_t a_i1 = n[4U * i + 2U];
    498                        uint64_t *res_i2 = res_j0 + 4U * i + 2U;
    499                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2);
    500                        uint64_t a_i2 = n[4U * i + 3U];
    501                        uint64_t *res_i = res_j0 + 4U * i + 3U;
    502                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i););
    503        {
    504            uint64_t a_i = n[8U];
    505            uint64_t *res_i = res_j0 + 8U;
    506            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i);
    507        } uint64_t r = c;
    508        uint64_t c1 = r;
    509        uint64_t *resb = x + 9U + i0;
    510        uint64_t res_j = x[9U + i0];
    511        c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
    512    memcpy(res, x + 9U, 9U * sizeof(uint64_t));
    513    uint64_t c00 = c0;
    514    uint64_t tmp[9U] = { 0U };
    515    uint64_t c = 0ULL;
    516    KRML_MAYBE_FOR2(i,
    517                    0U,
    518                    2U,
    519                    1U,
    520                    uint64_t t1 = res[4U * i];
    521                    uint64_t t20 = n[4U * i];
    522                    uint64_t *res_i0 = tmp + 4U * i;
    523                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
    524                    uint64_t t10 = res[4U * i + 1U];
    525                    uint64_t t21 = n[4U * i + 1U];
    526                    uint64_t *res_i1 = tmp + 4U * i + 1U;
    527                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
    528                    uint64_t t11 = res[4U * i + 2U];
    529                    uint64_t t22 = n[4U * i + 2U];
    530                    uint64_t *res_i2 = tmp + 4U * i + 2U;
    531                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
    532                    uint64_t t12 = res[4U * i + 3U];
    533                    uint64_t t2 = n[4U * i + 3U];
    534                    uint64_t *res_i = tmp + 4U * i + 3U;
    535                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i););
    536    {
    537        uint64_t t1 = res[8U];
    538        uint64_t t2 = n[8U];
    539        uint64_t *res_i = tmp + 8U;
    540        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
    541    }
    542    uint64_t c1 = c;
    543    uint64_t c2 = c00 - c1;
    544    KRML_MAYBE_FOR9(i,
    545                    0U,
    546                    9U,
    547                    1U,
    548                    uint64_t *os = res;
    549                    uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
    550                    os[i] = x1;);
    551 }
    552 
    553 static inline void
    554 qmont_reduction(uint64_t *res, uint64_t *x)
    555 {
    556    uint64_t n[9U] = { 0U };
    557    p521_make_order(n);
    558    uint64_t c0 = 0ULL;
    559    KRML_MAYBE_FOR9(
    560        i0,
    561        0U,
    562        9U,
    563        1U,
    564        uint64_t qj = 2103001588584519111ULL * x[i0];
    565        uint64_t *res_j0 = x + i0;
    566        uint64_t c = 0ULL;
    567        KRML_MAYBE_FOR2(i,
    568                        0U,
    569                        2U,
    570                        1U,
    571                        uint64_t a_i = n[4U * i];
    572                        uint64_t *res_i0 = res_j0 + 4U * i;
    573                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0);
    574                        uint64_t a_i0 = n[4U * i + 1U];
    575                        uint64_t *res_i1 = res_j0 + 4U * i + 1U;
    576                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1);
    577                        uint64_t a_i1 = n[4U * i + 2U];
    578                        uint64_t *res_i2 = res_j0 + 4U * i + 2U;
    579                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2);
    580                        uint64_t a_i2 = n[4U * i + 3U];
    581                        uint64_t *res_i = res_j0 + 4U * i + 3U;
    582                        c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i););
    583        {
    584            uint64_t a_i = n[8U];
    585            uint64_t *res_i = res_j0 + 8U;
    586            c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i);
    587        } uint64_t r = c;
    588        uint64_t c1 = r;
    589        uint64_t *resb = x + 9U + i0;
    590        uint64_t res_j = x[9U + i0];
    591        c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
    592    memcpy(res, x + 9U, 9U * sizeof(uint64_t));
    593    uint64_t c00 = c0;
    594    uint64_t tmp[9U] = { 0U };
    595    uint64_t c = 0ULL;
    596    KRML_MAYBE_FOR2(i,
    597                    0U,
    598                    2U,
    599                    1U,
    600                    uint64_t t1 = res[4U * i];
    601                    uint64_t t20 = n[4U * i];
    602                    uint64_t *res_i0 = tmp + 4U * i;
    603                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
    604                    uint64_t t10 = res[4U * i + 1U];
    605                    uint64_t t21 = n[4U * i + 1U];
    606                    uint64_t *res_i1 = tmp + 4U * i + 1U;
    607                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
    608                    uint64_t t11 = res[4U * i + 2U];
    609                    uint64_t t22 = n[4U * i + 2U];
    610                    uint64_t *res_i2 = tmp + 4U * i + 2U;
    611                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
    612                    uint64_t t12 = res[4U * i + 3U];
    613                    uint64_t t2 = n[4U * i + 3U];
    614                    uint64_t *res_i = tmp + 4U * i + 3U;
    615                    c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i););
    616    {
    617        uint64_t t1 = res[8U];
    618        uint64_t t2 = n[8U];
    619        uint64_t *res_i = tmp + 8U;
    620        c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
    621    }
    622    uint64_t c1 = c;
    623    uint64_t c2 = c00 - c1;
    624    KRML_MAYBE_FOR9(i,
    625                    0U,
    626                    9U,
    627                    1U,
    628                    uint64_t *os = res;
    629                    uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
    630                    os[i] = x1;);
    631 }
    632 
    633 static inline uint64_t
    634 bn_is_lt_prime_mask(uint64_t *f)
    635 {
    636    uint64_t tmp[9U] = { 0U };
    637    p521_make_prime(tmp);
    638    uint64_t c = bn_sub(tmp, f, tmp);
    639    uint64_t m = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL);
    640    return m;
    641 }
    642 
    643 static inline void
    644 fadd0(uint64_t *a, uint64_t *b, uint64_t *c)
    645 {
    646    uint64_t n[9U] = { 0U };
    647    p521_make_prime(n);
    648    bn_add_mod(a, n, b, c);
    649 }
    650 
    651 static inline void
    652 fsub0(uint64_t *a, uint64_t *b, uint64_t *c)
    653 {
    654    uint64_t n[9U] = { 0U };
    655    p521_make_prime(n);
    656    bn_sub_mod(a, n, b, c);
    657 }
    658 
    659 static inline void
    660 fmul0(uint64_t *a, uint64_t *b, uint64_t *c)
    661 {
    662    uint64_t tmp[18U] = { 0U };
    663    bn_mul(tmp, b, c);
    664    fmont_reduction(a, tmp);
    665 }
    666 
    667 static inline void
    668 fsqr0(uint64_t *a, uint64_t *b)
    669 {
    670    uint64_t tmp[18U] = { 0U };
    671    bn_sqr(tmp, b);
    672    fmont_reduction(a, tmp);
    673 }
    674 
    675 static inline void
    676 from_mont(uint64_t *a, uint64_t *b)
    677 {
    678    uint64_t tmp[18U] = { 0U };
    679    memcpy(tmp, b, 9U * sizeof(uint64_t));
    680    fmont_reduction(a, tmp);
    681 }
    682 
    683 static inline void
    684 to_mont(uint64_t *a, uint64_t *b)
    685 {
    686    uint64_t r2modn[9U] = { 0U };
    687    p521_make_fmont_R2(r2modn);
    688    uint64_t tmp[18U] = { 0U };
    689    bn_mul(tmp, b, r2modn);
    690    fmont_reduction(a, tmp);
    691 }
    692 
    693 static inline void
    694 p521_finv(uint64_t *res, uint64_t *a)
    695 {
    696    uint64_t b[9U] = { 0U };
    697    b[0U] = 0xfffffffffffffffdULL;
    698    b[1U] = 0xffffffffffffffffULL;
    699    b[2U] = 0xffffffffffffffffULL;
    700    b[3U] = 0xffffffffffffffffULL;
    701    b[4U] = 0xffffffffffffffffULL;
    702    b[5U] = 0xffffffffffffffffULL;
    703    b[6U] = 0xffffffffffffffffULL;
    704    b[7U] = 0xffffffffffffffffULL;
    705    b[8U] = 0x1ffULL;
    706    uint64_t tmp[9U] = { 0U };
    707    memcpy(tmp, a, 9U * sizeof(uint64_t));
    708    uint64_t table[288U] = { 0U };
    709    uint64_t tmp1[9U] = { 0U };
    710    uint64_t *t0 = table;
    711    uint64_t *t1 = table + 9U;
    712    p521_make_fone(t0);
    713    memcpy(t1, tmp, 9U * sizeof(uint64_t));
    714    KRML_MAYBE_FOR15(i,
    715                     0U,
    716                     15U,
    717                     1U,
    718                     uint64_t *t11 = table + (i + 1U) * 9U;
    719                     fsqr0(tmp1, t11);
    720                     memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t));
    721                     uint64_t *t2 = table + (2U * i + 2U) * 9U;
    722                     fmul0(tmp1, tmp, t2);
    723                     memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t)););
    724    uint32_t i0 = 520U;
    725    uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U);
    726    memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t));
    727    for (uint32_t i1 = 0U; i1 < 31U; i1++) {
    728        uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U));
    729        const uint64_t *res_j = table + (i1 + 1U) * 9U;
    730        KRML_MAYBE_FOR9(i,
    731                        0U,
    732                        9U,
    733                        1U,
    734                        uint64_t *os = res;
    735                        uint64_t x = (c & res_j[i]) | (~c & res[i]);
    736                        os[i] = x;);
    737    }
    738    uint64_t tmp10[9U] = { 0U };
    739    for (uint32_t i1 = 0U; i1 < 104U; i1++) {
    740        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, fsqr0(res, res););
    741        uint32_t k = 520U - 5U * i1 - 5U;
    742        uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U);
    743        memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t));
    744        for (uint32_t i2 = 0U; i2 < 31U; i2++) {
    745            uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U));
    746            const uint64_t *res_j = table + (i2 + 1U) * 9U;
    747            KRML_MAYBE_FOR9(i,
    748                            0U,
    749                            9U,
    750                            1U,
    751                            uint64_t *os = tmp10;
    752                            uint64_t x = (c & res_j[i]) | (~c & tmp10[i]);
    753                            os[i] = x;);
    754        }
    755        fmul0(res, res, tmp10);
    756    }
    757 }
    758 
    759 static inline void
    760 p521_fsqrt(uint64_t *res, uint64_t *a)
    761 {
    762    uint64_t b[9U] = { 0U };
    763    b[0U] = 0x0ULL;
    764    b[1U] = 0x0ULL;
    765    b[2U] = 0x0ULL;
    766    b[3U] = 0x0ULL;
    767    b[4U] = 0x0ULL;
    768    b[5U] = 0x0ULL;
    769    b[6U] = 0x0ULL;
    770    b[7U] = 0x0ULL;
    771    b[8U] = 0x80ULL;
    772    uint64_t tmp[9U] = { 0U };
    773    memcpy(tmp, a, 9U * sizeof(uint64_t));
    774    uint64_t table[288U] = { 0U };
    775    uint64_t tmp1[9U] = { 0U };
    776    uint64_t *t0 = table;
    777    uint64_t *t1 = table + 9U;
    778    p521_make_fone(t0);
    779    memcpy(t1, tmp, 9U * sizeof(uint64_t));
    780    KRML_MAYBE_FOR15(i,
    781                     0U,
    782                     15U,
    783                     1U,
    784                     uint64_t *t11 = table + (i + 1U) * 9U;
    785                     fsqr0(tmp1, t11);
    786                     memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t));
    787                     uint64_t *t2 = table + (2U * i + 2U) * 9U;
    788                     fmul0(tmp1, tmp, t2);
    789                     memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t)););
    790    uint32_t i0 = 520U;
    791    uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U);
    792    memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t));
    793    for (uint32_t i1 = 0U; i1 < 31U; i1++) {
    794        uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U));
    795        const uint64_t *res_j = table + (i1 + 1U) * 9U;
    796        KRML_MAYBE_FOR9(i,
    797                        0U,
    798                        9U,
    799                        1U,
    800                        uint64_t *os = res;
    801                        uint64_t x = (c & res_j[i]) | (~c & res[i]);
    802                        os[i] = x;);
    803    }
    804    uint64_t tmp10[9U] = { 0U };
    805    for (uint32_t i1 = 0U; i1 < 104U; i1++) {
    806        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, fsqr0(res, res););
    807        uint32_t k = 520U - 5U * i1 - 5U;
    808        uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U);
    809        memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t));
    810        for (uint32_t i2 = 0U; i2 < 31U; i2++) {
    811            uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U));
    812            const uint64_t *res_j = table + (i2 + 1U) * 9U;
    813            KRML_MAYBE_FOR9(i,
    814                            0U,
    815                            9U,
    816                            1U,
    817                            uint64_t *os = tmp10;
    818                            uint64_t x = (c & res_j[i]) | (~c & tmp10[i]);
    819                            os[i] = x;);
    820        }
    821        fmul0(res, res, tmp10);
    822    }
    823 }
    824 
    825 static inline uint64_t
    826 load_qelem_conditional(uint64_t *a, uint8_t *b)
    827 {
    828    bn_from_bytes_be(a, b);
    829    uint64_t tmp[9U] = { 0U };
    830    p521_make_order(tmp);
    831    uint64_t c = bn_sub(tmp, a, tmp);
    832    uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL);
    833    uint64_t bn_zero[9U] = { 0U };
    834    uint64_t res = bn_is_eq_mask(a, bn_zero);
    835    uint64_t is_eq_zero = res;
    836    uint64_t is_b_valid = is_lt_order & ~is_eq_zero;
    837    uint64_t oneq[9U] = { 0U };
    838    memset(oneq, 0U, 9U * sizeof(uint64_t));
    839    oneq[0U] = 1ULL;
    840    KRML_MAYBE_FOR9(i,
    841                    0U,
    842                    9U,
    843                    1U,
    844                    uint64_t *os = a;
    845                    uint64_t uu____0 = oneq[i];
    846                    uint64_t x = uu____0 ^ (is_b_valid & (a[i] ^ uu____0));
    847                    os[i] = x;);
    848    return is_b_valid;
    849 }
    850 
    851 static inline void
    852 qmod_short(uint64_t *a, uint64_t *b)
    853 {
    854    uint64_t tmp[9U] = { 0U };
    855    p521_make_order(tmp);
    856    uint64_t c = bn_sub(tmp, b, tmp);
    857    bn_cmovznz(a, c, tmp, b);
    858 }
    859 
    860 static inline void
    861 qadd(uint64_t *a, uint64_t *b, uint64_t *c)
    862 {
    863    uint64_t n[9U] = { 0U };
    864    p521_make_order(n);
    865    bn_add_mod(a, n, b, c);
    866 }
    867 
    868 static inline void
    869 qmul(uint64_t *a, uint64_t *b, uint64_t *c)
    870 {
    871    uint64_t tmp[18U] = { 0U };
    872    bn_mul(tmp, b, c);
    873    qmont_reduction(a, tmp);
    874 }
    875 
    876 static inline void
    877 qsqr(uint64_t *a, uint64_t *b)
    878 {
    879    uint64_t tmp[18U] = { 0U };
    880    bn_sqr(tmp, b);
    881    qmont_reduction(a, tmp);
    882 }
    883 
    884 static inline void
    885 from_qmont(uint64_t *a, uint64_t *b)
    886 {
    887    uint64_t tmp[18U] = { 0U };
    888    memcpy(tmp, b, 9U * sizeof(uint64_t));
    889    qmont_reduction(a, tmp);
    890 }
    891 
    892 static inline void
    893 p521_qinv(uint64_t *res, uint64_t *a)
    894 {
    895    uint64_t b[9U] = { 0U };
    896    b[0U] = 0xbb6fb71e91386407ULL;
    897    b[1U] = 0x3bb5c9b8899c47aeULL;
    898    b[2U] = 0x7fcc0148f709a5d0ULL;
    899    b[3U] = 0x51868783bf2f966bULL;
    900    b[4U] = 0xfffffffffffffffaULL;
    901    b[5U] = 0xffffffffffffffffULL;
    902    b[6U] = 0xffffffffffffffffULL;
    903    b[7U] = 0xffffffffffffffffULL;
    904    b[8U] = 0x1ffULL;
    905    uint64_t tmp[9U] = { 0U };
    906    memcpy(tmp, a, 9U * sizeof(uint64_t));
    907    uint64_t table[288U] = { 0U };
    908    uint64_t tmp1[9U] = { 0U };
    909    uint64_t *t0 = table;
    910    uint64_t *t1 = table + 9U;
    911    p521_make_qone(t0);
    912    memcpy(t1, tmp, 9U * sizeof(uint64_t));
    913    KRML_MAYBE_FOR15(i,
    914                     0U,
    915                     15U,
    916                     1U,
    917                     uint64_t *t11 = table + (i + 1U) * 9U;
    918                     qsqr(tmp1, t11);
    919                     memcpy(table + (2U * i + 2U) * 9U, tmp1, 9U * sizeof(uint64_t));
    920                     uint64_t *t2 = table + (2U * i + 2U) * 9U;
    921                     qmul(tmp1, tmp, t2);
    922                     memcpy(table + (2U * i + 3U) * 9U, tmp1, 9U * sizeof(uint64_t)););
    923    uint32_t i0 = 520U;
    924    uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, i0, 5U);
    925    memcpy(res, (uint64_t *)table, 9U * sizeof(uint64_t));
    926    for (uint32_t i1 = 0U; i1 < 31U; i1++) {
    927        uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U));
    928        const uint64_t *res_j = table + (i1 + 1U) * 9U;
    929        KRML_MAYBE_FOR9(i,
    930                        0U,
    931                        9U,
    932                        1U,
    933                        uint64_t *os = res;
    934                        uint64_t x = (c & res_j[i]) | (~c & res[i]);
    935                        os[i] = x;);
    936    }
    937    uint64_t tmp10[9U] = { 0U };
    938    for (uint32_t i1 = 0U; i1 < 104U; i1++) {
    939        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, qsqr(res, res););
    940        uint32_t k = 520U - 5U * i1 - 5U;
    941        uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, b, k, 5U);
    942        memcpy(tmp10, (uint64_t *)table, 9U * sizeof(uint64_t));
    943        for (uint32_t i2 = 0U; i2 < 31U; i2++) {
    944            uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U));
    945            const uint64_t *res_j = table + (i2 + 1U) * 9U;
    946            KRML_MAYBE_FOR9(i,
    947                            0U,
    948                            9U,
    949                            1U,
    950                            uint64_t *os = tmp10;
    951                            uint64_t x = (c & res_j[i]) | (~c & tmp10[i]);
    952                            os[i] = x;);
    953        }
    954        qmul(res, res, tmp10);
    955    }
    956 }
    957 
    958 static inline void
    959 point_add(uint64_t *x, uint64_t *y, uint64_t *xy)
    960 {
    961    uint64_t tmp[81U] = { 0U };
    962    uint64_t *t0 = tmp;
    963    uint64_t *t1 = tmp + 54U;
    964    uint64_t *x3 = t1;
    965    uint64_t *y3 = t1 + 9U;
    966    uint64_t *z3 = t1 + 18U;
    967    uint64_t *t01 = t0;
    968    uint64_t *t11 = t0 + 9U;
    969    uint64_t *t2 = t0 + 18U;
    970    uint64_t *t3 = t0 + 27U;
    971    uint64_t *t4 = t0 + 36U;
    972    uint64_t *t5 = t0 + 45U;
    973    uint64_t *x1 = x;
    974    uint64_t *y1 = x + 9U;
    975    uint64_t *z10 = x + 18U;
    976    uint64_t *x20 = y;
    977    uint64_t *y20 = y + 9U;
    978    uint64_t *z20 = y + 18U;
    979    fmul0(t01, x1, x20);
    980    fmul0(t11, y1, y20);
    981    fmul0(t2, z10, z20);
    982    fadd0(t3, x1, y1);
    983    fadd0(t4, x20, y20);
    984    fmul0(t3, t3, t4);
    985    fadd0(t4, t01, t11);
    986    uint64_t *y10 = x + 9U;
    987    uint64_t *z11 = x + 18U;
    988    uint64_t *y2 = y + 9U;
    989    uint64_t *z21 = y + 18U;
    990    fsub0(t3, t3, t4);
    991    fadd0(t4, y10, z11);
    992    fadd0(t5, y2, z21);
    993    fmul0(t4, t4, t5);
    994    fadd0(t5, t11, t2);
    995    fsub0(t4, t4, t5);
    996    uint64_t *x10 = x;
    997    uint64_t *z1 = x + 18U;
    998    uint64_t *x2 = y;
    999    uint64_t *z2 = y + 18U;
   1000    fadd0(x3, x10, z1);
   1001    fadd0(y3, x2, z2);
   1002    fmul0(x3, x3, y3);
   1003    fadd0(y3, t01, t2);
   1004    fsub0(y3, x3, y3);
   1005    uint64_t b_coeff[9U] = { 0U };
   1006    p521_make_b_coeff(b_coeff);
   1007    fmul0(z3, b_coeff, t2);
   1008    fsub0(x3, y3, z3);
   1009    fadd0(z3, x3, x3);
   1010    fadd0(x3, x3, z3);
   1011    fsub0(z3, t11, x3);
   1012    fadd0(x3, t11, x3);
   1013    uint64_t b_coeff0[9U] = { 0U };
   1014    p521_make_b_coeff(b_coeff0);
   1015    fmul0(y3, b_coeff0, y3);
   1016    fadd0(t11, t2, t2);
   1017    fadd0(t2, t11, t2);
   1018    fsub0(y3, y3, t2);
   1019    fsub0(y3, y3, t01);
   1020    fadd0(t11, y3, y3);
   1021    fadd0(y3, t11, y3);
   1022    fadd0(t11, t01, t01);
   1023    fadd0(t01, t11, t01);
   1024    fsub0(t01, t01, t2);
   1025    fmul0(t11, t4, y3);
   1026    fmul0(t2, t01, y3);
   1027    fmul0(y3, x3, z3);
   1028    fadd0(y3, y3, t2);
   1029    fmul0(x3, t3, x3);
   1030    fsub0(x3, x3, t11);
   1031    fmul0(z3, t4, z3);
   1032    fmul0(t11, t3, t01);
   1033    fadd0(z3, z3, t11);
   1034    memcpy(xy, t1, 27U * sizeof(uint64_t));
   1035 }
   1036 
   1037 static inline void
   1038 point_double(uint64_t *x, uint64_t *xx)
   1039 {
   1040    uint64_t tmp[45U] = { 0U };
   1041    uint64_t *x1 = x;
   1042    uint64_t *z = x + 18U;
   1043    uint64_t *x3 = xx;
   1044    uint64_t *y3 = xx + 9U;
   1045    uint64_t *z3 = xx + 18U;
   1046    uint64_t *t0 = tmp;
   1047    uint64_t *t1 = tmp + 9U;
   1048    uint64_t *t2 = tmp + 18U;
   1049    uint64_t *t3 = tmp + 27U;
   1050    uint64_t *t4 = tmp + 36U;
   1051    uint64_t *x2 = x;
   1052    uint64_t *y = x + 9U;
   1053    uint64_t *z1 = x + 18U;
   1054    fsqr0(t0, x2);
   1055    fsqr0(t1, y);
   1056    fsqr0(t2, z1);
   1057    fmul0(t3, x2, y);
   1058    fadd0(t3, t3, t3);
   1059    fmul0(t4, y, z1);
   1060    fmul0(z3, x1, z);
   1061    fadd0(z3, z3, z3);
   1062    uint64_t b_coeff[9U] = { 0U };
   1063    p521_make_b_coeff(b_coeff);
   1064    fmul0(y3, b_coeff, t2);
   1065    fsub0(y3, y3, z3);
   1066    fadd0(x3, y3, y3);
   1067    fadd0(y3, x3, y3);
   1068    fsub0(x3, t1, y3);
   1069    fadd0(y3, t1, y3);
   1070    fmul0(y3, x3, y3);
   1071    fmul0(x3, x3, t3);
   1072    fadd0(t3, t2, t2);
   1073    fadd0(t2, t2, t3);
   1074    uint64_t b_coeff0[9U] = { 0U };
   1075    p521_make_b_coeff(b_coeff0);
   1076    fmul0(z3, b_coeff0, z3);
   1077    fsub0(z3, z3, t2);
   1078    fsub0(z3, z3, t0);
   1079    fadd0(t3, z3, z3);
   1080    fadd0(z3, z3, t3);
   1081    fadd0(t3, t0, t0);
   1082    fadd0(t0, t3, t0);
   1083    fsub0(t0, t0, t2);
   1084    fmul0(t0, t0, z3);
   1085    fadd0(y3, y3, t0);
   1086    fadd0(t0, t4, t4);
   1087    fmul0(z3, t0, z3);
   1088    fsub0(x3, x3, z3);
   1089    fmul0(z3, t0, t1);
   1090    fadd0(z3, z3, z3);
   1091    fadd0(z3, z3, z3);
   1092 }
   1093 
   1094 static inline void
   1095 point_zero(uint64_t *one)
   1096 {
   1097    uint64_t *x = one;
   1098    uint64_t *y = one + 9U;
   1099    uint64_t *z = one + 18U;
   1100    p521_make_fzero(x);
   1101    p521_make_fone(y);
   1102    p521_make_fzero(z);
   1103 }
   1104 
   1105 static inline void
   1106 point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
   1107 {
   1108    uint64_t table[432U] = { 0U };
   1109    uint64_t tmp[27U] = { 0U };
   1110    uint64_t *t0 = table;
   1111    uint64_t *t1 = table + 27U;
   1112    point_zero(t0);
   1113    memcpy(t1, p, 27U * sizeof(uint64_t));
   1114    KRML_MAYBE_FOR7(i,
   1115                    0U,
   1116                    7U,
   1117                    1U,
   1118                    uint64_t *t11 = table + (i + 1U) * 27U;
   1119                    point_double(t11, tmp);
   1120                    memcpy(table + (2U * i + 2U) * 27U, tmp, 27U * sizeof(uint64_t));
   1121                    uint64_t *t2 = table + (2U * i + 2U) * 27U;
   1122                    point_add(p, t2, tmp);
   1123                    memcpy(table + (2U * i + 3U) * 27U, tmp, 27U * sizeof(uint64_t)););
   1124    uint32_t i0 = 520U;
   1125    uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64(9U, scalar, i0, 4U);
   1126    memcpy(res, (uint64_t *)table, 27U * sizeof(uint64_t));
   1127    KRML_MAYBE_FOR15(
   1128        i1,
   1129        0U,
   1130        15U,
   1131        1U,
   1132        uint64_t c = FStar_UInt64_eq_mask(bits_c, (uint64_t)(i1 + 1U));
   1133        const uint64_t *res_j = table + (i1 + 1U) * 27U;
   1134        for (uint32_t i = 0U; i < 27U; i++) {
   1135            uint64_t *os = res;
   1136            uint64_t x = (c & res_j[i]) | (~c & res[i]);
   1137            os[i] = x;
   1138        });
   1139    uint64_t tmp0[27U] = { 0U };
   1140    for (uint32_t i1 = 0U; i1 < 130U; i1++) {
   1141        KRML_MAYBE_FOR4(i, 0U, 4U, 1U, point_double(res, res););
   1142        uint32_t k = 520U - 4U * i1 - 4U;
   1143        uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64(9U, scalar, k, 4U);
   1144        memcpy(tmp0, (uint64_t *)table, 27U * sizeof(uint64_t));
   1145        KRML_MAYBE_FOR15(
   1146            i2,
   1147            0U,
   1148            15U,
   1149            1U,
   1150            uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i2 + 1U));
   1151            const uint64_t *res_j = table + (i2 + 1U) * 27U;
   1152            for (uint32_t i = 0U; i < 27U; i++) {
   1153                uint64_t *os = tmp0;
   1154                uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
   1155                os[i] = x;
   1156            });
   1157        point_add(res, tmp0, res);
   1158    }
   1159 }
   1160 
   1161 static inline void
   1162 point_mul_g(uint64_t *res, uint64_t *scalar)
   1163 {
   1164    uint64_t g[27U] = { 0U };
   1165    uint64_t *x = g;
   1166    uint64_t *y = g + 9U;
   1167    uint64_t *z = g + 18U;
   1168    p521_make_g_x(x);
   1169    p521_make_g_y(y);
   1170    p521_make_fone(z);
   1171    point_mul(res, scalar, g);
   1172 }
   1173 
   1174 static inline void
   1175 point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t *p)
   1176 {
   1177    uint64_t tmp[27U] = { 0U };
   1178    point_mul_g(tmp, scalar1);
   1179    point_mul(res, scalar2, p);
   1180    point_add(res, tmp, res);
   1181 }
   1182 
   1183 static inline bool
   1184 ecdsa_sign_msg_as_qelem(
   1185    uint8_t *signature,
   1186    uint64_t *m_q,
   1187    uint8_t *private_key,
   1188    uint8_t *nonce)
   1189 {
   1190    uint64_t rsdk_q[36U] = { 0U };
   1191    uint64_t *r_q = rsdk_q;
   1192    uint64_t *s_q = rsdk_q + 9U;
   1193    uint64_t *d_a = rsdk_q + 18U;
   1194    uint64_t *k_q = rsdk_q + 27U;
   1195    uint64_t is_sk_valid = load_qelem_conditional(d_a, private_key);
   1196    uint64_t is_nonce_valid = load_qelem_conditional(k_q, nonce);
   1197    uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
   1198    uint64_t p[27U] = { 0U };
   1199    point_mul_g(p, k_q);
   1200    uint64_t zinv[9U] = { 0U };
   1201    uint64_t *px = p;
   1202    uint64_t *pz = p + 18U;
   1203    p521_finv(zinv, pz);
   1204    fmul0(r_q, px, zinv);
   1205    from_mont(r_q, r_q);
   1206    qmod_short(r_q, r_q);
   1207    uint64_t kinv[9U] = { 0U };
   1208    p521_qinv(kinv, k_q);
   1209    qmul(s_q, r_q, d_a);
   1210    from_qmont(m_q, m_q);
   1211    qadd(s_q, m_q, s_q);
   1212    qmul(s_q, kinv, s_q);
   1213    bn_to_bytes_be(signature, r_q);
   1214    bn_to_bytes_be(signature + 66U, s_q);
   1215    uint64_t bn_zero0[9U] = { 0U };
   1216    uint64_t res = bn_is_eq_mask(r_q, bn_zero0);
   1217    uint64_t is_r_zero = res;
   1218    uint64_t bn_zero[9U] = { 0U };
   1219    uint64_t res0 = bn_is_eq_mask(s_q, bn_zero);
   1220    uint64_t is_s_zero = res0;
   1221    uint64_t m = are_sk_nonce_valid & (~is_r_zero & ~is_s_zero);
   1222    bool res1 = m == 0xFFFFFFFFFFFFFFFFULL;
   1223    return res1;
   1224 }
   1225 
   1226 static inline bool
   1227 ecdsa_verify_msg_as_qelem(
   1228    uint64_t *m_q,
   1229    uint8_t *public_key,
   1230    uint8_t *signature_r,
   1231    uint8_t *signature_s)
   1232 {
   1233    uint64_t tmp[63U] = { 0U };
   1234    uint64_t *pk = tmp;
   1235    uint64_t *r_q = tmp + 27U;
   1236    uint64_t *s_q = tmp + 36U;
   1237    uint64_t *u1 = tmp + 45U;
   1238    uint64_t *u2 = tmp + 54U;
   1239    uint64_t p_aff[18U] = { 0U };
   1240    uint8_t *p_x = public_key;
   1241    uint8_t *p_y = public_key + 66U;
   1242    uint64_t *bn_p_x = p_aff;
   1243    uint64_t *bn_p_y = p_aff + 9U;
   1244    bn_from_bytes_be(bn_p_x, p_x);
   1245    bn_from_bytes_be(bn_p_y, p_y);
   1246    uint64_t *px0 = p_aff;
   1247    uint64_t *py0 = p_aff + 9U;
   1248    uint64_t lessX = bn_is_lt_prime_mask(px0);
   1249    uint64_t lessY = bn_is_lt_prime_mask(py0);
   1250    uint64_t res0 = lessX & lessY;
   1251    bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL;
   1252    bool res;
   1253    if (!is_xy_valid) {
   1254        res = false;
   1255    } else {
   1256        uint64_t rp[9U] = { 0U };
   1257        uint64_t tx[9U] = { 0U };
   1258        uint64_t ty[9U] = { 0U };
   1259        uint64_t *px = p_aff;
   1260        uint64_t *py = p_aff + 9U;
   1261        to_mont(tx, px);
   1262        to_mont(ty, py);
   1263        uint64_t tmp1[9U] = { 0U };
   1264        fsqr0(rp, tx);
   1265        fmul0(rp, rp, tx);
   1266        p521_make_a_coeff(tmp1);
   1267        fmul0(tmp1, tmp1, tx);
   1268        fadd0(rp, tmp1, rp);
   1269        p521_make_b_coeff(tmp1);
   1270        fadd0(rp, tmp1, rp);
   1271        fsqr0(ty, ty);
   1272        uint64_t r = bn_is_eq_mask(ty, rp);
   1273        uint64_t r0 = r;
   1274        bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL;
   1275        res = r1;
   1276    }
   1277    if (res) {
   1278        uint64_t *px = p_aff;
   1279        uint64_t *py = p_aff + 9U;
   1280        uint64_t *rx = pk;
   1281        uint64_t *ry = pk + 9U;
   1282        uint64_t *rz = pk + 18U;
   1283        to_mont(rx, px);
   1284        to_mont(ry, py);
   1285        p521_make_fone(rz);
   1286    }
   1287    bool is_pk_valid = res;
   1288    bn_from_bytes_be(r_q, signature_r);
   1289    bn_from_bytes_be(s_q, signature_s);
   1290    uint64_t tmp10[9U] = { 0U };
   1291    p521_make_order(tmp10);
   1292    uint64_t c = bn_sub(tmp10, r_q, tmp10);
   1293    uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL);
   1294    uint64_t bn_zero0[9U] = { 0U };
   1295    uint64_t res1 = bn_is_eq_mask(r_q, bn_zero0);
   1296    uint64_t is_eq_zero = res1;
   1297    uint64_t is_r_valid = is_lt_order & ~is_eq_zero;
   1298    uint64_t tmp11[9U] = { 0U };
   1299    p521_make_order(tmp11);
   1300    uint64_t c0 = bn_sub(tmp11, s_q, tmp11);
   1301    uint64_t is_lt_order0 = FStar_UInt64_gte_mask(c0, 0ULL) & ~FStar_UInt64_eq_mask(c0, 0ULL);
   1302    uint64_t bn_zero1[9U] = { 0U };
   1303    uint64_t res2 = bn_is_eq_mask(s_q, bn_zero1);
   1304    uint64_t is_eq_zero0 = res2;
   1305    uint64_t is_s_valid = is_lt_order0 & ~is_eq_zero0;
   1306    bool is_rs_valid = is_r_valid == 0xFFFFFFFFFFFFFFFFULL && is_s_valid == 0xFFFFFFFFFFFFFFFFULL;
   1307    if (!(is_pk_valid && is_rs_valid)) {
   1308        return false;
   1309    }
   1310    uint64_t sinv[9U] = { 0U };
   1311    p521_qinv(sinv, s_q);
   1312    uint64_t tmp1[9U] = { 0U };
   1313    from_qmont(tmp1, m_q);
   1314    qmul(u1, sinv, tmp1);
   1315    uint64_t tmp12[9U] = { 0U };
   1316    from_qmont(tmp12, r_q);
   1317    qmul(u2, sinv, tmp12);
   1318    uint64_t res3[27U] = { 0U };
   1319    point_mul_double_g(res3, u1, u2, pk);
   1320    uint64_t *pz0 = res3 + 18U;
   1321    uint64_t bn_zero[9U] = { 0U };
   1322    uint64_t res10 = bn_is_eq_mask(pz0, bn_zero);
   1323    uint64_t m = res10;
   1324    if (m == 0xFFFFFFFFFFFFFFFFULL) {
   1325        return false;
   1326    }
   1327    uint64_t x[9U] = { 0U };
   1328    uint64_t zinv[9U] = { 0U };
   1329    uint64_t *px = res3;
   1330    uint64_t *pz = res3 + 18U;
   1331    p521_finv(zinv, pz);
   1332    fmul0(x, px, zinv);
   1333    from_mont(x, x);
   1334    qmod_short(x, x);
   1335    uint64_t m0 = bn_is_eq_mask(x, r_q);
   1336    bool res11 = m0 == 0xFFFFFFFFFFFFFFFFULL;
   1337    return res11;
   1338 }
   1339 
   1340 /*******************************************************************************
   1341 
   1342 Verified C library for ECDSA and ECDH functions over the P-521 NIST curve.
   1343 
   1344 This module implements signing and verification, key validation, conversions
   1345 between various point representations, and ECDH key agreement.
   1346 
   1347 *******************************************************************************/
   1348 
   1349 /*****************/
   1350 /* ECDSA signing */
   1351 /*****************/
   1352 
   1353 /**
   1354 Create an ECDSA signature WITHOUT hashing first.
   1355 
   1356  This function is intended to receive a hash of the input.
   1357  For convenience, we recommend using one of the hash-and-sign combined functions above.
   1358 
   1359  The argument `msg` MUST be at least 66 bytes (i.e. `msg_len >= 66`).
   1360 
   1361  NOTE: The equivalent functions in OpenSSL and Fiat-Crypto both accept inputs
   1362  smaller than 66 bytes. These libraries left-pad the input with enough zeroes to
   1363  reach the minimum 66 byte size. Clients who need behavior identical to OpenSSL
   1364  need to perform the left-padding themselves.
   1365 
   1366  The function returns `true` for successful creation of an ECDSA signature and `false` otherwise.
   1367 
   1368  The outparam `signature` (R || S) points to 132 bytes of valid memory, i.e., uint8_t[132].
   1369  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
   1370  The arguments `private_key` and `nonce` point to 66 bytes of valid memory, i.e., uint8_t[66].
   1371 
   1372  The function also checks whether `private_key` and `nonce` are valid values:
   1373    • 0 < `private_key` < the order of the curve
   1374    • 0 < `nonce` < the order of the curve
   1375 */
   1376 bool
   1377 Hacl_P521_ecdsa_sign_p521_without_hash(
   1378    uint8_t *signature,
   1379    uint32_t msg_len,
   1380    uint8_t *msg,
   1381    uint8_t *private_key,
   1382    uint8_t *nonce)
   1383 {
   1384    uint64_t m_q[9U] = { 0U };
   1385    uint8_t mHash[66U] = { 0U };
   1386    memcpy(mHash, msg, 66U * sizeof(uint8_t));
   1387    KRML_MAYBE_UNUSED_VAR(msg_len);
   1388    bn_from_bytes_be(m_q, mHash);
   1389    qmod_short(m_q, m_q);
   1390    bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
   1391    return res;
   1392 }
   1393 
   1394 /**********************/
   1395 /* ECDSA verification */
   1396 /**********************/
   1397 
   1398 /**
   1399 Verify an ECDSA signature WITHOUT hashing first.
   1400 
   1401  This function is intended to receive a hash of the input.
   1402  For convenience, we recommend using one of the hash-and-verify combined functions above.
   1403 
   1404  The argument `msg` MUST be at least 66 bytes (i.e. `msg_len >= 66`).
   1405 
   1406  The function returns `true` if the signature is valid and `false` otherwise.
   1407 
   1408  The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
   1409  The argument `public_key` (x || y) points to 132 bytes of valid memory, i.e., uint8_t[132].
   1410  The arguments `signature_r` and `signature_s` point to 66 bytes of valid memory, i.e., uint8_t[66].
   1411 
   1412  The function also checks whether `public_key` is valid
   1413 */
   1414 bool
   1415 Hacl_P521_ecdsa_verif_without_hash(
   1416    uint32_t msg_len,
   1417    uint8_t *msg,
   1418    uint8_t *public_key,
   1419    uint8_t *signature_r,
   1420    uint8_t *signature_s)
   1421 {
   1422    uint64_t m_q[9U] = { 0U };
   1423    uint8_t mHash[66U] = { 0U };
   1424    memcpy(mHash, msg, 66U * sizeof(uint8_t));
   1425    KRML_MAYBE_UNUSED_VAR(msg_len);
   1426    bn_from_bytes_be(m_q, mHash);
   1427    qmod_short(m_q, m_q);
   1428    bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
   1429    return res;
   1430 }
   1431 
   1432 /******************/
   1433 /* Key validation */
   1434 /******************/
   1435 
   1436 /**
   1437 Public key validation.
   1438 
   1439  The function returns `true` if a public key is valid and `false` otherwise.
   1440 
   1441  The argument `public_key` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1442 
   1443  The public key (x || y) is valid (with respect to SP 800-56A):
   1444    • the public key is not the “point at infinity”, represented as O.
   1445    • the affine x and y coordinates of the point represented by the public key are
   1446      in the range [0, p – 1] where p is the prime defining the finite field.
   1447    • y^2 = x^3 + ax + b where a and b are the coefficients of the curve equation.
   1448  The last extract is taken from: https://neilmadden.blog/2017/05/17/so-how-do-you-validate-nist-ecdh-public-keys/
   1449 */
   1450 bool
   1451 Hacl_P521_validate_public_key(uint8_t *public_key)
   1452 {
   1453    uint64_t point_jac[27U] = { 0U };
   1454    uint64_t p_aff[18U] = { 0U };
   1455    uint8_t *p_x = public_key;
   1456    uint8_t *p_y = public_key + 66U;
   1457    uint64_t *bn_p_x = p_aff;
   1458    uint64_t *bn_p_y = p_aff + 9U;
   1459    bn_from_bytes_be(bn_p_x, p_x);
   1460    bn_from_bytes_be(bn_p_y, p_y);
   1461    uint64_t *px0 = p_aff;
   1462    uint64_t *py0 = p_aff + 9U;
   1463    uint64_t lessX = bn_is_lt_prime_mask(px0);
   1464    uint64_t lessY = bn_is_lt_prime_mask(py0);
   1465    uint64_t res0 = lessX & lessY;
   1466    bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL;
   1467    bool res;
   1468    if (!is_xy_valid) {
   1469        res = false;
   1470    } else {
   1471        uint64_t rp[9U] = { 0U };
   1472        uint64_t tx[9U] = { 0U };
   1473        uint64_t ty[9U] = { 0U };
   1474        uint64_t *px = p_aff;
   1475        uint64_t *py = p_aff + 9U;
   1476        to_mont(tx, px);
   1477        to_mont(ty, py);
   1478        uint64_t tmp[9U] = { 0U };
   1479        fsqr0(rp, tx);
   1480        fmul0(rp, rp, tx);
   1481        p521_make_a_coeff(tmp);
   1482        fmul0(tmp, tmp, tx);
   1483        fadd0(rp, tmp, rp);
   1484        p521_make_b_coeff(tmp);
   1485        fadd0(rp, tmp, rp);
   1486        fsqr0(ty, ty);
   1487        uint64_t r = bn_is_eq_mask(ty, rp);
   1488        uint64_t r0 = r;
   1489        bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL;
   1490        res = r1;
   1491    }
   1492    if (res) {
   1493        uint64_t *px = p_aff;
   1494        uint64_t *py = p_aff + 9U;
   1495        uint64_t *rx = point_jac;
   1496        uint64_t *ry = point_jac + 9U;
   1497        uint64_t *rz = point_jac + 18U;
   1498        to_mont(rx, px);
   1499        to_mont(ry, py);
   1500        p521_make_fone(rz);
   1501    }
   1502    bool res1 = res;
   1503    return res1;
   1504 }
   1505 
   1506 /**
   1507 Private key validation.
   1508 
   1509  The function returns `true` if a private key is valid and `false` otherwise.
   1510 
   1511  The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66].
   1512 
   1513  The private key is valid:
   1514    • 0 < `private_key` < the order of the curve
   1515 */
   1516 bool
   1517 Hacl_P521_validate_private_key(uint8_t *private_key)
   1518 {
   1519    uint64_t bn_sk[9U] = { 0U };
   1520    bn_from_bytes_be(bn_sk, private_key);
   1521    uint64_t tmp[9U] = { 0U };
   1522    p521_make_order(tmp);
   1523    uint64_t c = bn_sub(tmp, bn_sk, tmp);
   1524    uint64_t is_lt_order = FStar_UInt64_gte_mask(c, 0ULL) & ~FStar_UInt64_eq_mask(c, 0ULL);
   1525    uint64_t bn_zero[9U] = { 0U };
   1526    uint64_t res = bn_is_eq_mask(bn_sk, bn_zero);
   1527    uint64_t is_eq_zero = res;
   1528    uint64_t res0 = is_lt_order & ~is_eq_zero;
   1529    return res0 == 0xFFFFFFFFFFFFFFFFULL;
   1530 }
   1531 
   1532 /*******************************************************************************
   1533  Parsing and Serializing public keys.
   1534 
   1535  A public key is a point (x, y) on the P-521 NIST curve.
   1536 
   1537  The point can be represented in the following three ways.
   1538    • raw          = [ x || y ], 132 bytes
   1539    • uncompressed = [ 0x04 || x || y ], 133 bytes
   1540    • compressed   = [ (0x02 for even `y` and 0x03 for odd `y`) || x ], 33 bytes
   1541 
   1542 *******************************************************************************/
   1543 
   1544 /**
   1545 Convert a public key from uncompressed to its raw form.
   1546 
   1547  The function returns `true` for successful conversion of a public key and `false` otherwise.
   1548 
   1549  The outparam `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1550  The argument `pk` points to 133 bytes of valid memory, i.e., uint8_t[133].
   1551 
   1552  The function DOESN'T check whether (x, y) is a valid point.
   1553 */
   1554 bool
   1555 Hacl_P521_uncompressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
   1556 {
   1557    uint8_t pk0 = pk[0U];
   1558    if (pk0 != 0x04U) {
   1559        return false;
   1560    }
   1561    memcpy(pk_raw, pk + 1U, 132U * sizeof(uint8_t));
   1562    return true;
   1563 }
   1564 
   1565 /**
   1566 Convert a public key from compressed to its raw form.
   1567 
   1568  The function returns `true` for successful conversion of a public key and `false` otherwise.
   1569 
   1570  The outparam `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1571  The argument `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
   1572 
   1573  The function also checks whether (x, y) is a valid point.
   1574 */
   1575 bool
   1576 Hacl_P521_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
   1577 {
   1578    uint64_t xa[9U] = { 0U };
   1579    uint64_t ya[9U] = { 0U };
   1580    uint8_t *pk_xb = pk + 1U;
   1581    uint8_t s0 = pk[0U];
   1582    uint8_t s01 = s0;
   1583    bool b;
   1584    if (!(s01 == 0x02U || s01 == 0x03U)) {
   1585        b = false;
   1586    } else {
   1587        uint8_t *xb = pk + 1U;
   1588        bn_from_bytes_be(xa, xb);
   1589        uint64_t is_x_valid = bn_is_lt_prime_mask(xa);
   1590        bool is_x_valid1 = is_x_valid == 0xFFFFFFFFFFFFFFFFULL;
   1591        bool is_y_odd = s01 == 0x03U;
   1592        if (!is_x_valid1) {
   1593            b = false;
   1594        } else {
   1595            uint64_t y2M[9U] = { 0U };
   1596            uint64_t xM[9U] = { 0U };
   1597            uint64_t yM[9U] = { 0U };
   1598            to_mont(xM, xa);
   1599            uint64_t tmp[9U] = { 0U };
   1600            fsqr0(y2M, xM);
   1601            fmul0(y2M, y2M, xM);
   1602            p521_make_a_coeff(tmp);
   1603            fmul0(tmp, tmp, xM);
   1604            fadd0(y2M, tmp, y2M);
   1605            p521_make_b_coeff(tmp);
   1606            fadd0(y2M, tmp, y2M);
   1607            p521_fsqrt(yM, y2M);
   1608            from_mont(ya, yM);
   1609            fsqr0(yM, yM);
   1610            uint64_t r = bn_is_eq_mask(yM, y2M);
   1611            uint64_t r0 = r;
   1612            bool is_y_valid = r0 == 0xFFFFFFFFFFFFFFFFULL;
   1613            bool is_y_valid0 = is_y_valid;
   1614            if (!is_y_valid0) {
   1615                b = false;
   1616            } else {
   1617                uint64_t is_y_odd1 = ya[0U] & 1ULL;
   1618                bool is_y_odd2 = is_y_odd1 == 1ULL;
   1619                uint64_t zero[9U] = { 0U };
   1620                if (is_y_odd2 != is_y_odd) {
   1621                    fsub0(ya, zero, ya);
   1622                }
   1623                b = true;
   1624            }
   1625        }
   1626    }
   1627    if (b) {
   1628        memcpy(pk_raw, pk_xb, 66U * sizeof(uint8_t));
   1629        bn_to_bytes_be(pk_raw + 66U, ya);
   1630    }
   1631    return b;
   1632 }
   1633 
   1634 /**
   1635 Convert a public key from raw to its uncompressed form.
   1636 
   1637  The outparam `pk` points to 133 bytes of valid memory, i.e., uint8_t[133].
   1638  The argument `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1639 
   1640  The function DOESN'T check whether (x, y) is a valid point.
   1641 */
   1642 void
   1643 Hacl_P521_raw_to_uncompressed(uint8_t *pk_raw, uint8_t *pk)
   1644 {
   1645    pk[0U] = 0x04U;
   1646    memcpy(pk + 1U, pk_raw, 132U * sizeof(uint8_t));
   1647 }
   1648 
   1649 /**
   1650 Convert a public key from raw to its compressed form.
   1651 
   1652  The outparam `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
   1653  The argument `pk_raw` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1654 
   1655  The function DOESN'T check whether (x, y) is a valid point.
   1656 */
   1657 void
   1658 Hacl_P521_raw_to_compressed(uint8_t *pk_raw, uint8_t *pk)
   1659 {
   1660    uint8_t *pk_x = pk_raw;
   1661    uint8_t *pk_y = pk_raw + 66U;
   1662    uint64_t bn_f[9U] = { 0U };
   1663    bn_from_bytes_be(bn_f, pk_y);
   1664    uint64_t is_odd_f = bn_f[0U] & 1ULL;
   1665    pk[0U] = (uint32_t)(uint8_t)is_odd_f + 0x02U;
   1666    memcpy(pk + 1U, pk_x, 66U * sizeof(uint8_t));
   1667 }
   1668 
   1669 /******************/
   1670 /* ECDH agreement */
   1671 /******************/
   1672 
   1673 /**
   1674 Compute the public key from the private key.
   1675 
   1676  The function returns `true` if a private key is valid and `false` otherwise.
   1677 
   1678  The outparam `public_key`  points to 132 bytes of valid memory, i.e., uint8_t[132].
   1679  The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66].
   1680 
   1681  The private key is valid:
   1682    • 0 < `private_key` < the order of the curve.
   1683 */
   1684 bool
   1685 Hacl_P521_dh_initiator(uint8_t *public_key, uint8_t *private_key)
   1686 {
   1687    uint64_t tmp[36U] = { 0U };
   1688    uint64_t *sk = tmp;
   1689    uint64_t *pk = tmp + 9U;
   1690    uint64_t is_sk_valid = load_qelem_conditional(sk, private_key);
   1691    point_mul_g(pk, sk);
   1692    uint64_t aff_p[18U] = { 0U };
   1693    uint64_t zinv[9U] = { 0U };
   1694    uint64_t *px = pk;
   1695    uint64_t *py0 = pk + 9U;
   1696    uint64_t *pz = pk + 18U;
   1697    uint64_t *x = aff_p;
   1698    uint64_t *y = aff_p + 9U;
   1699    p521_finv(zinv, pz);
   1700    fmul0(x, px, zinv);
   1701    fmul0(y, py0, zinv);
   1702    from_mont(x, x);
   1703    from_mont(y, y);
   1704    uint64_t *px0 = aff_p;
   1705    uint64_t *py = aff_p + 9U;
   1706    bn_to_bytes_be(public_key, px0);
   1707    bn_to_bytes_be(public_key + 66U, py);
   1708    return is_sk_valid == 0xFFFFFFFFFFFFFFFFULL;
   1709 }
   1710 
   1711 /**
   1712 Execute the diffie-hellmann key exchange.
   1713 
   1714  The function returns `true` for successful creation of an ECDH shared secret and
   1715  `false` otherwise.
   1716 
   1717  The outparam `shared_secret` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1718  The argument `their_pubkey` points to 132 bytes of valid memory, i.e., uint8_t[132].
   1719  The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66].
   1720 
   1721  The function also checks whether `private_key` and `their_pubkey` are valid.
   1722 */
   1723 bool
   1724 Hacl_P521_dh_responder(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t *private_key)
   1725 {
   1726    uint64_t tmp[264U] = { 0U };
   1727    uint64_t *sk = tmp;
   1728    uint64_t *pk = tmp + 9U;
   1729    uint64_t p_aff[18U] = { 0U };
   1730    uint8_t *p_x = their_pubkey;
   1731    uint8_t *p_y = their_pubkey + 66U;
   1732    uint64_t *bn_p_x = p_aff;
   1733    uint64_t *bn_p_y = p_aff + 9U;
   1734    bn_from_bytes_be(bn_p_x, p_x);
   1735    bn_from_bytes_be(bn_p_y, p_y);
   1736    uint64_t *px0 = p_aff;
   1737    uint64_t *py0 = p_aff + 9U;
   1738    uint64_t lessX = bn_is_lt_prime_mask(px0);
   1739    uint64_t lessY = bn_is_lt_prime_mask(py0);
   1740    uint64_t res0 = lessX & lessY;
   1741    bool is_xy_valid = res0 == 0xFFFFFFFFFFFFFFFFULL;
   1742    bool res;
   1743    if (!is_xy_valid) {
   1744        res = false;
   1745    } else {
   1746        uint64_t rp[9U] = { 0U };
   1747        uint64_t tx[9U] = { 0U };
   1748        uint64_t ty[9U] = { 0U };
   1749        uint64_t *px = p_aff;
   1750        uint64_t *py = p_aff + 9U;
   1751        to_mont(tx, px);
   1752        to_mont(ty, py);
   1753        uint64_t tmp1[9U] = { 0U };
   1754        fsqr0(rp, tx);
   1755        fmul0(rp, rp, tx);
   1756        p521_make_a_coeff(tmp1);
   1757        fmul0(tmp1, tmp1, tx);
   1758        fadd0(rp, tmp1, rp);
   1759        p521_make_b_coeff(tmp1);
   1760        fadd0(rp, tmp1, rp);
   1761        fsqr0(ty, ty);
   1762        uint64_t r = bn_is_eq_mask(ty, rp);
   1763        uint64_t r0 = r;
   1764        bool r1 = r0 == 0xFFFFFFFFFFFFFFFFULL;
   1765        res = r1;
   1766    }
   1767    if (res) {
   1768        uint64_t *px = p_aff;
   1769        uint64_t *py = p_aff + 9U;
   1770        uint64_t *rx = pk;
   1771        uint64_t *ry = pk + 9U;
   1772        uint64_t *rz = pk + 18U;
   1773        to_mont(rx, px);
   1774        to_mont(ry, py);
   1775        p521_make_fone(rz);
   1776    }
   1777    bool is_pk_valid = res;
   1778    uint64_t is_sk_valid = load_qelem_conditional(sk, private_key);
   1779    uint64_t ss_proj[27U] = { 0U };
   1780    if (is_pk_valid) {
   1781        point_mul(ss_proj, sk, pk);
   1782        uint64_t aff_p[18U] = { 0U };
   1783        uint64_t zinv[9U] = { 0U };
   1784        uint64_t *px = ss_proj;
   1785        uint64_t *py1 = ss_proj + 9U;
   1786        uint64_t *pz = ss_proj + 18U;
   1787        uint64_t *x = aff_p;
   1788        uint64_t *y = aff_p + 9U;
   1789        p521_finv(zinv, pz);
   1790        fmul0(x, px, zinv);
   1791        fmul0(y, py1, zinv);
   1792        from_mont(x, x);
   1793        from_mont(y, y);
   1794        uint64_t *px1 = aff_p;
   1795        uint64_t *py = aff_p + 9U;
   1796        bn_to_bytes_be(shared_secret, px1);
   1797        bn_to_bytes_be(shared_secret + 66U, py);
   1798    }
   1799    return is_sk_valid == 0xFFFFFFFFFFFFFFFFULL && is_pk_valid;
   1800 }