Hacl_Curve25519_64.c (12922B)
1 /* MIT License 2 * 3 * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation 4 * Copyright (c) 2022-2023 HACL* Contributors 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "Hacl_Curve25519_64.h" 26 27 #include "internal/Vale.h" 28 #include "internal/Hacl_Krmllib.h" 29 #include "config.h" 30 #include "curve25519-inline.h" 31 32 static inline void 33 add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) 34 { 35 #if HACL_CAN_COMPILE_INLINE_ASM 36 add_scalar(out, f1, f2); 37 #else 38 KRML_HOST_IGNORE(add_scalar_e(out, f1, f2)); 39 #endif 40 } 41 42 static inline void 43 fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2) 44 { 45 #if HACL_CAN_COMPILE_INLINE_ASM 46 fadd(out, f1, f2); 47 #else 48 KRML_HOST_IGNORE(fadd_e(out, f1, f2)); 49 #endif 50 } 51 52 static inline void 53 fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2) 54 { 55 #if HACL_CAN_COMPILE_INLINE_ASM 56 fsub(out, f1, f2); 57 #else 58 KRML_HOST_IGNORE(fsub_e(out, f1, f2)); 59 #endif 60 } 61 62 static inline void 63 fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) 64 { 65 #if HACL_CAN_COMPILE_INLINE_ASM 66 fmul(out, f1, f2, tmp); 67 #else 68 KRML_HOST_IGNORE(fmul_e(tmp, f1, out, f2)); 69 #endif 70 } 71 72 static inline void 73 fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) 74 { 75 #if HACL_CAN_COMPILE_INLINE_ASM 76 fmul2(out, f1, f2, tmp); 77 #else 78 KRML_HOST_IGNORE(fmul2_e(tmp, f1, out, f2)); 79 #endif 80 } 81 82 static inline void 83 fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) 84 { 85 #if HACL_CAN_COMPILE_INLINE_ASM 86 fmul_scalar(out, f1, f2); 87 #else 88 KRML_HOST_IGNORE(fmul_scalar_e(out, f1, f2)); 89 #endif 90 } 91 92 static inline void 93 fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp) 94 { 95 #if HACL_CAN_COMPILE_INLINE_ASM 96 fsqr(out, f1, tmp); 97 #else 98 KRML_HOST_IGNORE(fsqr_e(tmp, f1, out)); 99 #endif 100 } 101 102 static inline void 103 fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp) 104 { 105 #if HACL_CAN_COMPILE_INLINE_ASM 106 fsqr2(out, f, tmp); 107 #else 108 KRML_HOST_IGNORE(fsqr2_e(tmp, f, out)); 109 #endif 110 } 111 112 static inline void 113 cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2) 114 { 115 #if HACL_CAN_COMPILE_INLINE_ASM 116 cswap2(bit, p1, p2); 117 #else 118 KRML_HOST_IGNORE(cswap2_e(bit, p1, p2)); 119 #endif 120 } 121 122 static const uint8_t g25519[32U] = { (uint8_t)9U }; 123 124 static void 125 point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2) 126 { 127 uint64_t *nq = p01_tmp1; 128 uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U; 129 uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; 130 uint64_t *x1 = q; 131 uint64_t *x2 = nq; 132 uint64_t *z2 = nq + (uint32_t)4U; 133 uint64_t *z3 = nq_p1 + (uint32_t)4U; 134 uint64_t *a = tmp1; 135 uint64_t *b = tmp1 + (uint32_t)4U; 136 uint64_t *ab = tmp1; 137 uint64_t *dc = tmp1 + (uint32_t)8U; 138 fadd0(a, x2, z2); 139 fsub0(b, x2, z2); 140 uint64_t *x3 = nq_p1; 141 uint64_t *z31 = nq_p1 + (uint32_t)4U; 142 uint64_t *d0 = dc; 143 uint64_t *c0 = dc + (uint32_t)4U; 144 fadd0(c0, x3, z31); 145 fsub0(d0, x3, z31); 146 fmul20(dc, dc, ab, tmp2); 147 fadd0(x3, d0, c0); 148 fsub0(z31, d0, c0); 149 uint64_t *a1 = tmp1; 150 uint64_t *b1 = tmp1 + (uint32_t)4U; 151 uint64_t *d = tmp1 + (uint32_t)8U; 152 uint64_t *c = tmp1 + (uint32_t)12U; 153 uint64_t *ab1 = tmp1; 154 uint64_t *dc1 = tmp1 + (uint32_t)8U; 155 fsqr20(dc1, ab1, tmp2); 156 fsqr20(nq_p1, nq_p1, tmp2); 157 a1[0U] = c[0U]; 158 a1[1U] = c[1U]; 159 a1[2U] = c[2U]; 160 a1[3U] = c[3U]; 161 fsub0(c, d, c); 162 fmul_scalar0(b1, c, (uint64_t)121665U); 163 fadd0(b1, b1, d); 164 fmul20(nq, dc1, ab1, tmp2); 165 fmul0(z3, z3, x1, tmp2); 166 } 167 168 static void 169 point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2) 170 { 171 uint64_t *x2 = nq; 172 uint64_t *z2 = nq + (uint32_t)4U; 173 uint64_t *a = tmp1; 174 uint64_t *b = tmp1 + (uint32_t)4U; 175 uint64_t *d = tmp1 + (uint32_t)8U; 176 uint64_t *c = tmp1 + (uint32_t)12U; 177 uint64_t *ab = tmp1; 178 uint64_t *dc = tmp1 + (uint32_t)8U; 179 fadd0(a, x2, z2); 180 fsub0(b, x2, z2); 181 fsqr20(dc, ab, tmp2); 182 a[0U] = c[0U]; 183 a[1U] = c[1U]; 184 a[2U] = c[2U]; 185 a[3U] = c[3U]; 186 fsub0(c, d, c); 187 fmul_scalar0(b, c, (uint64_t)121665U); 188 fadd0(b, b, d); 189 fmul20(nq, dc, ab, tmp2); 190 } 191 192 static void 193 montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) 194 { 195 uint64_t tmp2[16U] = { 0U }; 196 uint64_t p01_tmp1_swap[33U] = { 0U }; 197 uint64_t *p0 = p01_tmp1_swap; 198 uint64_t *p01 = p01_tmp1_swap; 199 uint64_t *p03 = p01; 200 uint64_t *p11 = p01 + (uint32_t)8U; 201 memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t)); 202 uint64_t *x0 = p03; 203 uint64_t *z0 = p03 + (uint32_t)4U; 204 x0[0U] = (uint64_t)1U; 205 x0[1U] = (uint64_t)0U; 206 x0[2U] = (uint64_t)0U; 207 x0[3U] = (uint64_t)0U; 208 z0[0U] = (uint64_t)0U; 209 z0[1U] = (uint64_t)0U; 210 z0[2U] = (uint64_t)0U; 211 z0[3U] = (uint64_t)0U; 212 uint64_t *p01_tmp1 = p01_tmp1_swap; 213 uint64_t *p01_tmp11 = p01_tmp1_swap; 214 uint64_t *nq1 = p01_tmp1_swap; 215 uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U; 216 uint64_t *swap = p01_tmp1_swap + (uint32_t)32U; 217 cswap20((uint64_t)1U, nq1, nq_p11); 218 point_add_and_double(init, p01_tmp11, tmp2); 219 swap[0U] = (uint64_t)1U; 220 for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { 221 uint64_t *p01_tmp12 = p01_tmp1_swap; 222 uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U; 223 uint64_t *nq2 = p01_tmp12; 224 uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U; 225 uint64_t 226 bit = 227 (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); 228 uint64_t sw = swap1[0U] ^ bit; 229 cswap20(sw, nq2, nq_p12); 230 point_add_and_double(init, p01_tmp12, tmp2); 231 swap1[0U] = bit; 232 } 233 uint64_t sw = swap[0U]; 234 cswap20(sw, nq1, nq_p11); 235 uint64_t *nq10 = p01_tmp1; 236 uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; 237 point_double(nq10, tmp1, tmp2); 238 point_double(nq10, tmp1, tmp2); 239 point_double(nq10, tmp1, tmp2); 240 memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t)); 241 } 242 243 static void 244 fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n) 245 { 246 fsqr0(o, inp, tmp); 247 for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { 248 fsqr0(o, o, tmp); 249 } 250 } 251 252 static void 253 finv(uint64_t *o, uint64_t *i, uint64_t *tmp) 254 { 255 uint64_t t1[16U] = { 0U }; 256 uint64_t *a1 = t1; 257 uint64_t *b1 = t1 + (uint32_t)4U; 258 uint64_t *t010 = t1 + (uint32_t)12U; 259 uint64_t *tmp10 = tmp; 260 fsquare_times(a1, i, tmp10, (uint32_t)1U); 261 fsquare_times(t010, a1, tmp10, (uint32_t)2U); 262 fmul0(b1, t010, i, tmp); 263 fmul0(a1, b1, a1, tmp); 264 fsquare_times(t010, a1, tmp10, (uint32_t)1U); 265 fmul0(b1, t010, b1, tmp); 266 fsquare_times(t010, b1, tmp10, (uint32_t)5U); 267 fmul0(b1, t010, b1, tmp); 268 uint64_t *b10 = t1 + (uint32_t)4U; 269 uint64_t *c10 = t1 + (uint32_t)8U; 270 uint64_t *t011 = t1 + (uint32_t)12U; 271 uint64_t *tmp11 = tmp; 272 fsquare_times(t011, b10, tmp11, (uint32_t)10U); 273 fmul0(c10, t011, b10, tmp); 274 fsquare_times(t011, c10, tmp11, (uint32_t)20U); 275 fmul0(t011, t011, c10, tmp); 276 fsquare_times(t011, t011, tmp11, (uint32_t)10U); 277 fmul0(b10, t011, b10, tmp); 278 fsquare_times(t011, b10, tmp11, (uint32_t)50U); 279 fmul0(c10, t011, b10, tmp); 280 uint64_t *b11 = t1 + (uint32_t)4U; 281 uint64_t *c1 = t1 + (uint32_t)8U; 282 uint64_t *t01 = t1 + (uint32_t)12U; 283 uint64_t *tmp1 = tmp; 284 fsquare_times(t01, c1, tmp1, (uint32_t)100U); 285 fmul0(t01, t01, c1, tmp); 286 fsquare_times(t01, t01, tmp1, (uint32_t)50U); 287 fmul0(t01, t01, b11, tmp); 288 fsquare_times(t01, t01, tmp1, (uint32_t)5U); 289 uint64_t *a = t1; 290 uint64_t *t0 = t1 + (uint32_t)12U; 291 fmul0(o, t0, a, tmp); 292 } 293 294 static void 295 store_felem(uint64_t *b, uint64_t *f) 296 { 297 uint64_t f30 = f[3U]; 298 uint64_t top_bit0 = f30 >> (uint32_t)63U; 299 f[3U] = f30 & (uint64_t)0x7fffffffffffffffU; 300 add_scalar0(f, f, (uint64_t)19U * top_bit0); 301 uint64_t f31 = f[3U]; 302 uint64_t top_bit = f31 >> (uint32_t)63U; 303 f[3U] = f31 & (uint64_t)0x7fffffffffffffffU; 304 add_scalar0(f, f, (uint64_t)19U * top_bit); 305 uint64_t f0 = f[0U]; 306 uint64_t f1 = f[1U]; 307 uint64_t f2 = f[2U]; 308 uint64_t f3 = f[3U]; 309 uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU); 310 uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU); 311 uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU); 312 uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU); 313 uint64_t mask = ((m0 & m1) & m2) & m3; 314 uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU); 315 uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU); 316 uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU); 317 uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU); 318 uint64_t o0 = f0_; 319 uint64_t o1 = f1_; 320 uint64_t o2 = f2_; 321 uint64_t o3 = f3_; 322 b[0U] = o0; 323 b[1U] = o1; 324 b[2U] = o2; 325 b[3U] = o3; 326 } 327 328 static void 329 encode_point(uint8_t *o, uint64_t *i) 330 { 331 uint64_t *x = i; 332 uint64_t *z = i + (uint32_t)4U; 333 uint64_t tmp[4U] = { 0U }; 334 uint64_t u64s[4U] = { 0U }; 335 uint64_t tmp_w[16U] = { 0U }; 336 finv(tmp, z, tmp_w); 337 fmul0(tmp, tmp, x, tmp_w); 338 store_felem(u64s, tmp); 339 KRML_MAYBE_FOR4(i0, 340 (uint32_t)0U, 341 (uint32_t)4U, 342 (uint32_t)1U, 343 store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); 344 } 345 346 /** 347 Compute the scalar multiple of a point. 348 349 @param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to. 350 @param priv Pointer to 32 bytes of memory where the secret/private key is read from. 351 @param pub Pointer to 32 bytes of memory where the public point is read from. 352 */ 353 void 354 Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) 355 { 356 uint64_t init[8U] = { 0U }; 357 uint64_t tmp[4U] = { 0U }; 358 KRML_MAYBE_FOR4(i, 359 (uint32_t)0U, 360 (uint32_t)4U, 361 (uint32_t)1U, 362 uint64_t *os = tmp; 363 uint8_t *bj = pub + i * (uint32_t)8U; 364 uint64_t u = load64_le(bj); 365 uint64_t r = u; 366 uint64_t x = r; 367 os[i] = x;); 368 uint64_t tmp3 = tmp[3U]; 369 tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; 370 uint64_t *x = init; 371 uint64_t *z = init + (uint32_t)4U; 372 z[0U] = (uint64_t)1U; 373 z[1U] = (uint64_t)0U; 374 z[2U] = (uint64_t)0U; 375 z[3U] = (uint64_t)0U; 376 x[0U] = tmp[0U]; 377 x[1U] = tmp[1U]; 378 x[2U] = tmp[2U]; 379 x[3U] = tmp[3U]; 380 montgomery_ladder(init, priv, init); 381 encode_point(out, init); 382 } 383 384 /** 385 Calculate a public point from a secret/private key. 386 387 This computes a scalar multiplication of the secret/private key with the curve's basepoint. 388 389 @param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to. 390 @param priv Pointer to 32 bytes of memory where the secret/private key is read from. 391 */ 392 void 393 Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv) 394 { 395 uint8_t basepoint[32U] = { 0U }; 396 for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { 397 uint8_t *os = basepoint; 398 uint8_t x = g25519[i]; 399 os[i] = x; 400 } 401 Hacl_Curve25519_64_scalarmult(pub, priv, basepoint); 402 } 403 404 /** 405 Execute the diffie-hellmann key exchange. 406 407 @param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to. 408 @param priv Pointer to 32 bytes of memory where **our** secret/private key is read from. 409 @param pub Pointer to 32 bytes of memory where **their** public point is read from. 410 */ 411 bool 412 Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) 413 { 414 uint8_t zeros[32U] = { 0U }; 415 Hacl_Curve25519_64_scalarmult(out, priv, pub); 416 uint8_t res = (uint8_t)255U; 417 for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { 418 uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); 419 res = uu____0 & res; 420 } 421 uint8_t z = res; 422 bool r = z == (uint8_t)255U; 423 return !r; 424 }