tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Hacl_Curve25519_64.c (12922B)


      1 /* MIT License
      2 *
      3 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
      4 * Copyright (c) 2022-2023 HACL* Contributors
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to deal
      8 * in the Software without restriction, including without limitation the rights
      9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10 * copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in all
     14 * copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22 * SOFTWARE.
     23 */
     24 
     25 #include "Hacl_Curve25519_64.h"
     26 
     27 #include "internal/Vale.h"
     28 #include "internal/Hacl_Krmllib.h"
     29 #include "config.h"
     30 #include "curve25519-inline.h"
     31 
     32 static inline void
     33 add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2)
     34 {
     35 #if HACL_CAN_COMPILE_INLINE_ASM
     36    add_scalar(out, f1, f2);
     37 #else
     38    KRML_HOST_IGNORE(add_scalar_e(out, f1, f2));
     39 #endif
     40 }
     41 
     42 static inline void
     43 fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2)
     44 {
     45 #if HACL_CAN_COMPILE_INLINE_ASM
     46    fadd(out, f1, f2);
     47 #else
     48    KRML_HOST_IGNORE(fadd_e(out, f1, f2));
     49 #endif
     50 }
     51 
     52 static inline void
     53 fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2)
     54 {
     55 #if HACL_CAN_COMPILE_INLINE_ASM
     56    fsub(out, f1, f2);
     57 #else
     58    KRML_HOST_IGNORE(fsub_e(out, f1, f2));
     59 #endif
     60 }
     61 
     62 static inline void
     63 fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
     64 {
     65 #if HACL_CAN_COMPILE_INLINE_ASM
     66    fmul(out, f1, f2, tmp);
     67 #else
     68    KRML_HOST_IGNORE(fmul_e(tmp, f1, out, f2));
     69 #endif
     70 }
     71 
     72 static inline void
     73 fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
     74 {
     75 #if HACL_CAN_COMPILE_INLINE_ASM
     76    fmul2(out, f1, f2, tmp);
     77 #else
     78    KRML_HOST_IGNORE(fmul2_e(tmp, f1, out, f2));
     79 #endif
     80 }
     81 
     82 static inline void
     83 fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2)
     84 {
     85 #if HACL_CAN_COMPILE_INLINE_ASM
     86    fmul_scalar(out, f1, f2);
     87 #else
     88    KRML_HOST_IGNORE(fmul_scalar_e(out, f1, f2));
     89 #endif
     90 }
     91 
     92 static inline void
     93 fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp)
     94 {
     95 #if HACL_CAN_COMPILE_INLINE_ASM
     96    fsqr(out, f1, tmp);
     97 #else
     98    KRML_HOST_IGNORE(fsqr_e(tmp, f1, out));
     99 #endif
    100 }
    101 
    102 static inline void
    103 fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp)
    104 {
    105 #if HACL_CAN_COMPILE_INLINE_ASM
    106    fsqr2(out, f, tmp);
    107 #else
    108    KRML_HOST_IGNORE(fsqr2_e(tmp, f, out));
    109 #endif
    110 }
    111 
    112 static inline void
    113 cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2)
    114 {
    115 #if HACL_CAN_COMPILE_INLINE_ASM
    116    cswap2(bit, p1, p2);
    117 #else
    118    KRML_HOST_IGNORE(cswap2_e(bit, p1, p2));
    119 #endif
    120 }
    121 
    122 static const uint8_t g25519[32U] = { (uint8_t)9U };
    123 
    124 static void
    125 point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2)
    126 {
    127    uint64_t *nq = p01_tmp1;
    128    uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U;
    129    uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U;
    130    uint64_t *x1 = q;
    131    uint64_t *x2 = nq;
    132    uint64_t *z2 = nq + (uint32_t)4U;
    133    uint64_t *z3 = nq_p1 + (uint32_t)4U;
    134    uint64_t *a = tmp1;
    135    uint64_t *b = tmp1 + (uint32_t)4U;
    136    uint64_t *ab = tmp1;
    137    uint64_t *dc = tmp1 + (uint32_t)8U;
    138    fadd0(a, x2, z2);
    139    fsub0(b, x2, z2);
    140    uint64_t *x3 = nq_p1;
    141    uint64_t *z31 = nq_p1 + (uint32_t)4U;
    142    uint64_t *d0 = dc;
    143    uint64_t *c0 = dc + (uint32_t)4U;
    144    fadd0(c0, x3, z31);
    145    fsub0(d0, x3, z31);
    146    fmul20(dc, dc, ab, tmp2);
    147    fadd0(x3, d0, c0);
    148    fsub0(z31, d0, c0);
    149    uint64_t *a1 = tmp1;
    150    uint64_t *b1 = tmp1 + (uint32_t)4U;
    151    uint64_t *d = tmp1 + (uint32_t)8U;
    152    uint64_t *c = tmp1 + (uint32_t)12U;
    153    uint64_t *ab1 = tmp1;
    154    uint64_t *dc1 = tmp1 + (uint32_t)8U;
    155    fsqr20(dc1, ab1, tmp2);
    156    fsqr20(nq_p1, nq_p1, tmp2);
    157    a1[0U] = c[0U];
    158    a1[1U] = c[1U];
    159    a1[2U] = c[2U];
    160    a1[3U] = c[3U];
    161    fsub0(c, d, c);
    162    fmul_scalar0(b1, c, (uint64_t)121665U);
    163    fadd0(b1, b1, d);
    164    fmul20(nq, dc1, ab1, tmp2);
    165    fmul0(z3, z3, x1, tmp2);
    166 }
    167 
    168 static void
    169 point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2)
    170 {
    171    uint64_t *x2 = nq;
    172    uint64_t *z2 = nq + (uint32_t)4U;
    173    uint64_t *a = tmp1;
    174    uint64_t *b = tmp1 + (uint32_t)4U;
    175    uint64_t *d = tmp1 + (uint32_t)8U;
    176    uint64_t *c = tmp1 + (uint32_t)12U;
    177    uint64_t *ab = tmp1;
    178    uint64_t *dc = tmp1 + (uint32_t)8U;
    179    fadd0(a, x2, z2);
    180    fsub0(b, x2, z2);
    181    fsqr20(dc, ab, tmp2);
    182    a[0U] = c[0U];
    183    a[1U] = c[1U];
    184    a[2U] = c[2U];
    185    a[3U] = c[3U];
    186    fsub0(c, d, c);
    187    fmul_scalar0(b, c, (uint64_t)121665U);
    188    fadd0(b, b, d);
    189    fmul20(nq, dc, ab, tmp2);
    190 }
    191 
    192 static void
    193 montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
    194 {
    195    uint64_t tmp2[16U] = { 0U };
    196    uint64_t p01_tmp1_swap[33U] = { 0U };
    197    uint64_t *p0 = p01_tmp1_swap;
    198    uint64_t *p01 = p01_tmp1_swap;
    199    uint64_t *p03 = p01;
    200    uint64_t *p11 = p01 + (uint32_t)8U;
    201    memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t));
    202    uint64_t *x0 = p03;
    203    uint64_t *z0 = p03 + (uint32_t)4U;
    204    x0[0U] = (uint64_t)1U;
    205    x0[1U] = (uint64_t)0U;
    206    x0[2U] = (uint64_t)0U;
    207    x0[3U] = (uint64_t)0U;
    208    z0[0U] = (uint64_t)0U;
    209    z0[1U] = (uint64_t)0U;
    210    z0[2U] = (uint64_t)0U;
    211    z0[3U] = (uint64_t)0U;
    212    uint64_t *p01_tmp1 = p01_tmp1_swap;
    213    uint64_t *p01_tmp11 = p01_tmp1_swap;
    214    uint64_t *nq1 = p01_tmp1_swap;
    215    uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U;
    216    uint64_t *swap = p01_tmp1_swap + (uint32_t)32U;
    217    cswap20((uint64_t)1U, nq1, nq_p11);
    218    point_add_and_double(init, p01_tmp11, tmp2);
    219    swap[0U] = (uint64_t)1U;
    220    for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) {
    221        uint64_t *p01_tmp12 = p01_tmp1_swap;
    222        uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U;
    223        uint64_t *nq2 = p01_tmp12;
    224        uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U;
    225        uint64_t
    226            bit =
    227                (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U);
    228        uint64_t sw = swap1[0U] ^ bit;
    229        cswap20(sw, nq2, nq_p12);
    230        point_add_and_double(init, p01_tmp12, tmp2);
    231        swap1[0U] = bit;
    232    }
    233    uint64_t sw = swap[0U];
    234    cswap20(sw, nq1, nq_p11);
    235    uint64_t *nq10 = p01_tmp1;
    236    uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U;
    237    point_double(nq10, tmp1, tmp2);
    238    point_double(nq10, tmp1, tmp2);
    239    point_double(nq10, tmp1, tmp2);
    240    memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t));
    241 }
    242 
    243 static void
    244 fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
    245 {
    246    fsqr0(o, inp, tmp);
    247    for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) {
    248        fsqr0(o, o, tmp);
    249    }
    250 }
    251 
    252 static void
    253 finv(uint64_t *o, uint64_t *i, uint64_t *tmp)
    254 {
    255    uint64_t t1[16U] = { 0U };
    256    uint64_t *a1 = t1;
    257    uint64_t *b1 = t1 + (uint32_t)4U;
    258    uint64_t *t010 = t1 + (uint32_t)12U;
    259    uint64_t *tmp10 = tmp;
    260    fsquare_times(a1, i, tmp10, (uint32_t)1U);
    261    fsquare_times(t010, a1, tmp10, (uint32_t)2U);
    262    fmul0(b1, t010, i, tmp);
    263    fmul0(a1, b1, a1, tmp);
    264    fsquare_times(t010, a1, tmp10, (uint32_t)1U);
    265    fmul0(b1, t010, b1, tmp);
    266    fsquare_times(t010, b1, tmp10, (uint32_t)5U);
    267    fmul0(b1, t010, b1, tmp);
    268    uint64_t *b10 = t1 + (uint32_t)4U;
    269    uint64_t *c10 = t1 + (uint32_t)8U;
    270    uint64_t *t011 = t1 + (uint32_t)12U;
    271    uint64_t *tmp11 = tmp;
    272    fsquare_times(t011, b10, tmp11, (uint32_t)10U);
    273    fmul0(c10, t011, b10, tmp);
    274    fsquare_times(t011, c10, tmp11, (uint32_t)20U);
    275    fmul0(t011, t011, c10, tmp);
    276    fsquare_times(t011, t011, tmp11, (uint32_t)10U);
    277    fmul0(b10, t011, b10, tmp);
    278    fsquare_times(t011, b10, tmp11, (uint32_t)50U);
    279    fmul0(c10, t011, b10, tmp);
    280    uint64_t *b11 = t1 + (uint32_t)4U;
    281    uint64_t *c1 = t1 + (uint32_t)8U;
    282    uint64_t *t01 = t1 + (uint32_t)12U;
    283    uint64_t *tmp1 = tmp;
    284    fsquare_times(t01, c1, tmp1, (uint32_t)100U);
    285    fmul0(t01, t01, c1, tmp);
    286    fsquare_times(t01, t01, tmp1, (uint32_t)50U);
    287    fmul0(t01, t01, b11, tmp);
    288    fsquare_times(t01, t01, tmp1, (uint32_t)5U);
    289    uint64_t *a = t1;
    290    uint64_t *t0 = t1 + (uint32_t)12U;
    291    fmul0(o, t0, a, tmp);
    292 }
    293 
    294 static void
    295 store_felem(uint64_t *b, uint64_t *f)
    296 {
    297    uint64_t f30 = f[3U];
    298    uint64_t top_bit0 = f30 >> (uint32_t)63U;
    299    f[3U] = f30 & (uint64_t)0x7fffffffffffffffU;
    300    add_scalar0(f, f, (uint64_t)19U * top_bit0);
    301    uint64_t f31 = f[3U];
    302    uint64_t top_bit = f31 >> (uint32_t)63U;
    303    f[3U] = f31 & (uint64_t)0x7fffffffffffffffU;
    304    add_scalar0(f, f, (uint64_t)19U * top_bit);
    305    uint64_t f0 = f[0U];
    306    uint64_t f1 = f[1U];
    307    uint64_t f2 = f[2U];
    308    uint64_t f3 = f[3U];
    309    uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU);
    310    uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU);
    311    uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU);
    312    uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU);
    313    uint64_t mask = ((m0 & m1) & m2) & m3;
    314    uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU);
    315    uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU);
    316    uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU);
    317    uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU);
    318    uint64_t o0 = f0_;
    319    uint64_t o1 = f1_;
    320    uint64_t o2 = f2_;
    321    uint64_t o3 = f3_;
    322    b[0U] = o0;
    323    b[1U] = o1;
    324    b[2U] = o2;
    325    b[3U] = o3;
    326 }
    327 
    328 static void
    329 encode_point(uint8_t *o, uint64_t *i)
    330 {
    331    uint64_t *x = i;
    332    uint64_t *z = i + (uint32_t)4U;
    333    uint64_t tmp[4U] = { 0U };
    334    uint64_t u64s[4U] = { 0U };
    335    uint64_t tmp_w[16U] = { 0U };
    336    finv(tmp, z, tmp_w);
    337    fmul0(tmp, tmp, x, tmp_w);
    338    store_felem(u64s, tmp);
    339    KRML_MAYBE_FOR4(i0,
    340                    (uint32_t)0U,
    341                    (uint32_t)4U,
    342                    (uint32_t)1U,
    343                    store64_le(o + i0 * (uint32_t)8U, u64s[i0]););
    344 }
    345 
    346 /**
    347 Compute the scalar multiple of a point.
    348 
    349 @param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
    350 @param priv Pointer to 32 bytes of memory where the secret/private key is read from.
    351 @param pub Pointer to 32 bytes of memory where the public point is read from.
    352 */
    353 void
    354 Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
    355 {
    356    uint64_t init[8U] = { 0U };
    357    uint64_t tmp[4U] = { 0U };
    358    KRML_MAYBE_FOR4(i,
    359                    (uint32_t)0U,
    360                    (uint32_t)4U,
    361                    (uint32_t)1U,
    362                    uint64_t *os = tmp;
    363                    uint8_t *bj = pub + i * (uint32_t)8U;
    364                    uint64_t u = load64_le(bj);
    365                    uint64_t r = u;
    366                    uint64_t x = r;
    367                    os[i] = x;);
    368    uint64_t tmp3 = tmp[3U];
    369    tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU;
    370    uint64_t *x = init;
    371    uint64_t *z = init + (uint32_t)4U;
    372    z[0U] = (uint64_t)1U;
    373    z[1U] = (uint64_t)0U;
    374    z[2U] = (uint64_t)0U;
    375    z[3U] = (uint64_t)0U;
    376    x[0U] = tmp[0U];
    377    x[1U] = tmp[1U];
    378    x[2U] = tmp[2U];
    379    x[3U] = tmp[3U];
    380    montgomery_ladder(init, priv, init);
    381    encode_point(out, init);
    382 }
    383 
    384 /**
    385 Calculate a public point from a secret/private key.
    386 
    387 This computes a scalar multiplication of the secret/private key with the curve's basepoint.
    388 
    389 @param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
    390 @param priv Pointer to 32 bytes of memory where the secret/private key is read from.
    391 */
    392 void
    393 Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv)
    394 {
    395    uint8_t basepoint[32U] = { 0U };
    396    for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
    397        uint8_t *os = basepoint;
    398        uint8_t x = g25519[i];
    399        os[i] = x;
    400    }
    401    Hacl_Curve25519_64_scalarmult(pub, priv, basepoint);
    402 }
    403 
    404 /**
    405 Execute the diffie-hellmann key exchange.
    406 
    407 @param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
    408 @param priv Pointer to 32 bytes of memory where **our** secret/private key is read from.
    409 @param pub Pointer to 32 bytes of memory where **their** public point is read from.
    410 */
    411 bool
    412 Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub)
    413 {
    414    uint8_t zeros[32U] = { 0U };
    415    Hacl_Curve25519_64_scalarmult(out, priv, pub);
    416    uint8_t res = (uint8_t)255U;
    417    for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
    418        uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]);
    419        res = uu____0 & res;
    420    }
    421    uint8_t z = res;
    422    bool r = z == (uint8_t)255U;
    423    return !r;
    424 }