tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

commit af67487d7bfae08784886362e9b208acd998c389
parent da60d5c1e64a18dc12cdbd4de4480731c164d0e6
Author: Nick Mathewson <nickm@torproject.org>
Date:   Thu, 22 May 2025 10:20:08 -0400

Merge branch 'cgo-faster' into 'main'

Portability and speed improvements to cgo crypto

See merge request tpo/core/tor!900
Diffstat:
Msrc/core/crypto/relay_crypto_cgo.c | 45+++++++++++++++++++++++++++++++--------------
Msrc/core/crypto/relay_crypto_cgo.h | 13++++---------
Msrc/ext/polyval/ctmul.c | 2+-
Msrc/ext/polyval/ctmul64.c | 14+++++++-------
Msrc/ext/polyval/pclmul.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Msrc/ext/polyval/polyval.c | 344+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/ext/polyval/polyval.h | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/lib/crypt_ops/.may_include | 1+
Msrc/lib/crypt_ops/aes.h | 27+++------------------------
Msrc/lib/crypt_ops/aes_nss.c | 107+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/lib/crypt_ops/aes_openssl.c | 399++++++++++++++++---------------------------------------------------------------
Msrc/lib/crypt_ops/crypto_init.c | 3+++
Msrc/lib/crypt_ops/crypto_openssl_mgt.c | 3---
Msrc/test/bench.c | 178++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/test/test_crypto.c | 223++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
15 files changed, 930 insertions(+), 589 deletions(-)

diff --git a/src/core/crypto/relay_crypto_cgo.c b/src/core/crypto/relay_crypto_cgo.c @@ -54,7 +54,7 @@ cgo_et_init(cgo_et_t *et, int aesbits, bool encrypt, et->kb = aes_raw_new(key, aesbits, encrypt); if (et->kb == NULL) return -1; - polyval_key_init(&et->ku, key + aes_key_bytes); + polyvalx_init(&et->ku, key + aes_key_bytes); return 0; } /** Replace the key on an existing, already initialized cgo_et_t. @@ -66,25 +66,24 @@ cgo_et_set_key(cgo_et_t *et, int aesbits, bool encrypt, { size_t aes_key_bytes = aesbits / 8; aes_raw_set_key(&et->kb, key, aesbits, encrypt); - polyval_key_init(&et->ku, key + aes_key_bytes); + polyvalx_init(&et->ku, key + aes_key_bytes); } /** Helper: Compute polyval(KU, H | CMD | X_R). */ static inline void -compute_et_mask(polyval_key_t *pvk, const et_tweak_t tweak, uint8_t *t_out) +compute_et_mask(polyvalx_t *pvk, const et_tweak_t tweak, uint8_t *t_out) { // block 0: tweak.h // block 1: one byte of command, first 15 bytes of x_r // block 2...: remainder of x_r, zero-padded. - polyval_t pv; + polyvalx_reset(pvk); uint8_t block1[16]; block1[0] = tweak.uiv.cmd; memcpy(block1+1, tweak.x_r, 15); - polyval_init_from_key(&pv, pvk); - polyval_add_block(&pv, tweak.uiv.h); - polyval_add_block(&pv, block1); - polyval_add_zpad(&pv, tweak.x_r + 15, ET_TWEAK_LEN_X_R - 15); - polyval_get_tag(&pv, t_out); + polyvalx_add_block(pvk, tweak.uiv.h); + polyvalx_add_block(pvk, block1); + polyvalx_add_zpad(pvk, tweak.x_r + 15, ET_TWEAK_LEN_X_R - 15); + polyvalx_get_tag(pvk, t_out); } /** XOR the 16 byte block from inp into out. */ static void @@ -148,9 +147,10 @@ STATIC int cgo_prf_init(cgo_prf_t *prf, int aesbits, const uint8_t *key) { + const uint8_t iv[16] = {0}; size_t aes_key_bytes = aesbits / 8; memset(prf,0, sizeof(*prf)); - prf->k = aes_raw_new(key, aesbits, true); + prf->k = aes_new_cipher(key, iv, aesbits); polyval_key_init(&prf->b, key + aes_key_bytes); return 0; } @@ -162,7 +162,7 @@ cgo_prf_set_key(cgo_prf_t *prf, int aesbits, const uint8_t *key) { size_t aes_key_bytes = aesbits / 8; - aes_raw_set_key(&prf->k, key, aesbits, true); + aes_cipher_set_key(prf->k, key, aesbits); polyval_key_init(&prf->b, key + aes_key_bytes); } /** @@ -184,7 +184,15 @@ cgo_prf_xor_t0(cgo_prf_t *prf, const uint8_t *input, polyval_get_tag(&pv, hash); hash[15] &= 0xC0; // Clear the low six bits. - aes_raw_counter_xor(prf->k, hash, 0, data, PRF_T0_DATA_LEN); + aes_cipher_set_iv_aligned(prf->k, hash); + aes_crypt_inplace(prf->k, (char*) data, PRF_T0_DATA_LEN); + + // Re-align the cipher. + // + // This approach is faster than EVP_CIPHER_set_num! + const int ns = 16 - (PRF_T0_DATA_LEN % 0xf); + // We're not using the hash for anything, so it's okay to overwrite + aes_crypt_inplace(prf->k, (char*)hash, ns); } /** * Generate 'n' bytes of the PRF's results on 'input', for position t=1, @@ -203,9 +211,18 @@ cgo_prf_gen_t1(cgo_prf_t *prf, const uint8_t *input, polyval_add_block(&pv, input); polyval_get_tag(&pv, hash); hash[15] &= 0xC0; // Clear the low six bits. + hash[15] += T1_OFFSET; // Can't overflow! memset(buf, 0, n); - aes_raw_counter_xor(prf->k, hash, T1_OFFSET, buf, n); + aes_cipher_set_iv_aligned(prf->k, hash); + aes_crypt_inplace(prf->k, (char*)buf, n); + + // Re-align the cipher. + size_t ns = 16-(n&0x0f); + if (ns) { + // We're not using the hash for anything, so it's okay to overwrite + aes_crypt_inplace(prf->k, (char*) hash, ns); + } } /** * Release any storage held in 'prf'. @@ -215,7 +232,7 @@ cgo_prf_gen_t1(cgo_prf_t *prf, const uint8_t *input, STATIC void cgo_prf_clear(cgo_prf_t *prf) { - aes_raw_free(prf->k); + aes_cipher_free(prf->k); } static int diff --git a/src/core/crypto/relay_crypto_cgo.h b/src/core/crypto/relay_crypto_cgo.h @@ -77,23 +77,18 @@ typedef struct cgo_et_t { */ aes_raw_t *kb; /** - * Polyval key. + * Polyval instance, with expanded key. */ - polyval_key_t ku; + polyvalx_t ku; } cgo_et_t; /** * Keyed pseudorandom function, based on polyval and AES-CTR. */ typedef struct cgo_prf_t { /** - * AES key: may be 128, 192, or 256 bits. - * - * Even though we're going to be using this in counter mode, - * we don't make an aes_cnt_cipher_t here, since that type - * does not support efficient re-use of the key with multiple - * IVs. + * AES stream cipher: may be 128, 192, or 256 bits. */ - aes_raw_t *k; + aes_cnt_cipher_t *k; /** * Polyval instance. */ diff --git a/src/ext/polyval/ctmul.c b/src/ext/polyval/ctmul.c @@ -194,7 +194,7 @@ bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y) #endif static void -pv_mul_y_h(polyval_t *pv) +pv_mul_y_h_ctmul(polyval_t *pv) { uint32_t *yw = pv->y.v; const uint32_t *hw = pv->key.h.v; diff --git a/src/ext/polyval/ctmul64.c b/src/ext/polyval/ctmul64.c @@ -73,15 +73,15 @@ rev64(uint64_t x) static void -pv_mul_y_h(polyval_t *pv) +pv_mul_y_h_ctmul64(polyval_t *pv) { uint64_t y0, y1; uint64_t h0, h1, h2, h0r, h1r, h2r; - y0 = pv->y.lo; - y1 = pv->y.hi; - h0 = pv->key.h.lo; - h1 = pv->key.h.hi; + y0 = CTMUL64_MEMBER(pv->y).lo; + y1 = CTMUL64_MEMBER(pv->y).hi; + h0 = CTMUL64_MEMBER(pv->key.h).lo; + h1 = CTMUL64_MEMBER(pv->key.h).hi; h0r = rev64(h0); h1r = rev64(h1); @@ -127,7 +127,7 @@ pv_mul_y_h(polyval_t *pv) v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7); v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57); - pv->y.lo = v2; - pv->y.hi = v3; + CTMUL64_MEMBER(pv->y).lo = v2; + CTMUL64_MEMBER(pv->y).hi = v3; } } diff --git a/src/ext/polyval/pclmul.c b/src/ext/polyval/pclmul.c @@ -72,7 +72,7 @@ BR_TARGETS_X86_UP * We use a target of "sse2" only, so that Clang may still handle the * '__m128i' type and allocate SSE2 registers. */ -#ifdef __clang__ +#ifdef __clang__AND_NOT_WORKING BR_TARGET("sse2") static inline __m128i pclmulqdq00(__m128i x, __m128i y) @@ -149,21 +149,90 @@ pclmulqdq11(__m128i x, __m128i y) } while (0) +BR_TARGET("ssse3,pclmul") +static inline void +expand_key_pclmul(const polyval_t *pv, pv_expanded_key_t *out) +{ + __m128i h1w, h1x; + __m128i lastw, lastx; + __m128i t0, t1, t2, t3; + + h1w = PCLMUL_MEMBER(pv->key.h); + BK(h1w, h1x); + lastw = h1w; + + for (int i = PV_BLOCK_STRIDE - 2; i >= 0; --i) { + BK(lastw, lastx); + + t1 = pclmulqdq11(lastw, h1w); + t3 = pclmulqdq00(lastw, h1w); + t2 = pclmulqdq00(lastx, h1x); + t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); + t0 = _mm_shuffle_epi32(t1, 0x0E); + t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E)); + REDUCE_F128(t0, t1, t2, t3); + out->k[i] = lastw = _mm_unpacklo_epi64(t1, t0); + } +} + +// Add PCLMUL_BLOCK_STRIDE * 16 bytes from input. +BR_TARGET("ssse3,pclmul") +static inline void +pv_add_multiple_pclmul(polyval_t *pv, + const uint8_t *input, + const pv_expanded_key_t *expanded) +{ + __m128i t0, t1, t2, t3; + + t1 = _mm_setzero_si128(); + t2 = _mm_setzero_si128(); + t3 = _mm_setzero_si128(); + + for (int i = 0; i < PV_BLOCK_STRIDE; ++i, input += 16) { + __m128i aw = _mm_loadu_si128((void *)(input)); + __m128i ax; + __m128i hx, hw; + if (i == 0) { + aw = _mm_xor_si128(aw, PCLMUL_MEMBER(pv->y)); + } + if (i == PV_BLOCK_STRIDE - 1) { + hw = PCLMUL_MEMBER(pv->key.h); + } else { + hw = expanded->k[i]; + } + BK(aw, ax); + BK(hw, hx); + t1 = _mm_xor_si128(t1, pclmulqdq11(aw, hw)); + t3 = _mm_xor_si128(t3, pclmulqdq00(aw, hw)); + t2 = _mm_xor_si128(t2, pclmulqdq00(ax, hx)); + } + + t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3)); + t0 = _mm_shuffle_epi32(t1, 0x0E); + t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E)); + t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E)); + + REDUCE_F128(t0, t1, t2, t3); + PCLMUL_MEMBER(pv->y) = _mm_unpacklo_epi64(t1, t0); +} + + /* see bearssl_hash.h */ BR_TARGET("ssse3,pclmul") -static -void pv_mul_y_h(polyval_t *pv) +static inline void +pv_mul_y_h_pclmul(polyval_t *pv) { __m128i yw, h1w, h1x; - h1w = pv->key.h; + h1w = PCLMUL_MEMBER(pv->key.h); BK(h1w, h1x); { __m128i aw, ax; __m128i t0, t1, t2, t3; - aw = pv->y; + aw = PCLMUL_MEMBER(pv->y); BK(aw, ax); t1 = pclmulqdq11(aw, h1w); @@ -180,5 +249,5 @@ void pv_mul_y_h(polyval_t *pv) yw = _mm_unpacklo_epi64(t1, t0); } - pv->y = yw; + PCLMUL_MEMBER(pv->y) = yw; } diff --git a/src/ext/polyval/polyval.c b/src/ext/polyval/polyval.c @@ -39,6 +39,10 @@ #include <string.h> +#ifdef PV_USE_PCLMUL_DETECT +#include <cpuid.h> +#endif + typedef pv_u128_ u128; /* ======== @@ -47,6 +51,7 @@ typedef pv_u128_ u128; * They have different definitions depending on our representation * of 128-bit integers. */ +#if 0 /** * Read a u128-bit little-endian integer from 'bytes', * which may not be aligned. @@ -72,7 +77,8 @@ static inline void pv_xor_y(polyval_t *, u128 v); * * (This is a carryless multiply in the Polyval galois field) */ -static void pv_mul_y_h(polyval_t *); +static void pv_mul_y_h(polyval_t *);h +#endif /* ===== * Endianness conversion for big-endian platforms @@ -116,58 +122,77 @@ bswap32(uint64_t v) #define convert_byte_order32(x) (x) #endif -#ifdef PV_USE_PCLMUL +#if defined PV_USE_PCLMUL_UNCONDITIONAL +#define PCLMUL_MEMBER(v) (v) +#define PV_USE_PCLMUL +#elif defined PV_USE_PCLMUL_DETECT +#define PCLMUL_MEMBER(v) (v).u128x1 +#define CTMUL64_MEMBER(v) (v).u64x2 +#define PV_USE_PCLMUL +#define PV_USE_CTMUL64 + +#elif defined PV_USE_CTMUL64 +#define CTMUL64_MEMBER(v) (v) +#endif + +#ifdef PV_USE_PCLMUL #include "ext/polyval/pclmul.c" static inline u128 -u128_from_bytes(const uint8_t *bytes) +u128_from_bytes_pclmul(const uint8_t *bytes) { - return _mm_loadu_si128((const u128*)bytes); + u128 r; + PCLMUL_MEMBER(r) = _mm_loadu_si128((const __m128i*)bytes); + return r; } static inline void -u128_to_bytes(u128 val, uint8_t *bytes_out) +u128_to_bytes_pclmul(u128 val, uint8_t *bytes_out) { - _mm_storeu_si128((u128*)bytes_out, val); + _mm_storeu_si128((__m128i*)bytes_out, PCLMUL_MEMBER(val)); } static inline void -pv_xor_y(polyval_t *pv, u128 v) +pv_xor_y_pclmul(polyval_t *pv, u128 v) { - pv->y = _mm_xor_si128(pv->y, v); + PCLMUL_MEMBER(pv->y) = _mm_xor_si128(PCLMUL_MEMBER(pv->y), + PCLMUL_MEMBER(v)); } -#elif defined(PV_USE_CTMUL64) +#endif +#if defined(PV_USE_CTMUL64) #include "ext/polyval/ctmul64.c" static inline u128 -u128_from_bytes(const uint8_t *bytes) +u128_from_bytes_ctmul64(const uint8_t *bytes) { u128 r; - memcpy(&r.lo, bytes, 8); - memcpy(&r.hi, bytes + 8, 8); - r.lo = convert_byte_order64(r.lo); - r.hi = convert_byte_order64(r.hi); + memcpy(&CTMUL64_MEMBER(r).lo, bytes, 8); + memcpy(&CTMUL64_MEMBER(r).hi, bytes + 8, 8); + CTMUL64_MEMBER(r).lo = convert_byte_order64(CTMUL64_MEMBER(r).lo); + CTMUL64_MEMBER(r).hi = convert_byte_order64(CTMUL64_MEMBER(r).hi); return r; } static inline void -u128_to_bytes(u128 val, uint8_t *bytes_out) +u128_to_bytes_ctmul64(u128 val, uint8_t *bytes_out) { - uint64_t lo = convert_byte_order64(val.lo); - uint64_t hi = convert_byte_order64(val.hi); + uint64_t lo = convert_byte_order64(CTMUL64_MEMBER(val).lo); + uint64_t hi = convert_byte_order64(CTMUL64_MEMBER(val).hi); memcpy(bytes_out, &lo, 8); memcpy(bytes_out + 8, &hi, 8); } static inline void -pv_xor_y(polyval_t *pv, u128 val) +pv_xor_y_ctmul64(polyval_t *pv, u128 val) { - pv->y.lo ^= val.lo; - pv->y.hi ^= val.hi; + CTMUL64_MEMBER(pv->y).lo ^= CTMUL64_MEMBER(val).lo; + CTMUL64_MEMBER(pv->y).hi ^= CTMUL64_MEMBER(val).hi; } -#elif defined(PV_USE_CTMUL) +#endif + +#if defined(PV_USE_CTMUL) #include "ext/polyval/ctmul.c" static inline u128 -u128_from_bytes(const uint8_t *bytes) +u128_from_bytes_ctmul(const uint8_t *bytes) { u128 r; memcpy(&r.v, bytes, 16); @@ -177,7 +202,7 @@ u128_from_bytes(const uint8_t *bytes) return r; } static inline void -u128_to_bytes(u128 val, uint8_t *bytes_out) +u128_to_bytes_ctmul(u128 val, uint8_t *bytes_out) { uint32_t v[4]; for (int i = 0; i < 4; ++i) { @@ -186,7 +211,7 @@ u128_to_bytes(u128 val, uint8_t *bytes_out) memcpy(bytes_out, v, 16); } static inline void -pv_xor_y(polyval_t *pv, u128 val) +pv_xor_y_ctmul(polyval_t *pv, u128 val) { for (int i = 0; i < 4; ++i) { pv->y.v[i] ^= val.v[i]; @@ -194,35 +219,274 @@ pv_xor_y(polyval_t *pv, u128 val) } #endif +struct expanded_key_none {}; +static inline void add_multiple_none(polyval_t *pv, + const uint8_t *input, + const struct expanded_key_none *expanded) +{ + (void) pv; + (void) input; + (void) expanded; +} +static inline void expand_key_none(const polyval_t *inp, + struct expanded_key_none *out) +{ + (void) inp; + (void) out; +} + +/* Kludge: a special value to use for block_stride when we don't support + * processing multiple blocks at once. Previously we used 0, but that + * caused warnings with some comparisons. */ +#define BLOCK_STRIDE_NONE 0xffff + +#define PV_DECLARE(prefix, \ + st, \ + u128_from_bytes, \ + u128_to_bytes, \ + pv_xor_y, \ + pv_mul_y_h, \ + block_stride, \ + expanded_key_tp, expand_fn, add_multiple_fn) \ + st void \ + prefix ## polyval_key_init(polyval_key_t *pvk, const uint8_t *key) \ + { \ + pvk->h = u128_from_bytes(key); \ + } \ + st void \ + prefix ## polyval_init(polyval_t *pv, const uint8_t *key) \ + { \ + polyval_key_init(&pv->key, key); \ + memset(&pv->y, 0, sizeof(u128)); \ + } \ + st void \ + prefix ## polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) \ + { \ + memcpy(&pv->key, key, sizeof(polyval_key_t)); \ + memset(&pv->y, 0, sizeof(u128)); \ + } \ + st void \ + prefix ## polyval_add_block(polyval_t *pv, const uint8_t *block) \ + { \ + u128 b = u128_from_bytes(block); \ + pv_xor_y(pv, b); \ + pv_mul_y_h(pv); \ + } \ + st void \ + prefix ## polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) \ + { \ + /* since block_stride is a constant, this should get optimized */ \ + if ((block_stride != BLOCK_STRIDE_NONE) \ + && n >= (block_stride) * 16) { \ + expanded_key_tp expanded_key; \ + expand_fn(pv, &expanded_key); \ + while (n >= (block_stride) * 16) { \ + add_multiple_fn(pv, data, &expanded_key); \ + n -= block_stride*16; \ + data += block_stride * 16; \ + } \ + } \ + while (n > 16) { \ + polyval_add_block(pv, data); \ + data += 16; \ + n -= 16; \ + } \ + if (n) { \ + uint8_t block[16]; \ + memset(&block, 0, sizeof(block)); \ + memcpy(block, data, n); \ + polyval_add_block(pv, block); \ + } \ + } \ + st void \ + prefix ## polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) \ + { \ + u128_to_bytes(pv->y, tag_out); \ + } \ + st void \ + prefix ## polyval_reset(polyval_t *pv) \ + { \ + memset(&pv->y, 0, sizeof(u128)); \ + } + +#ifdef PV_USE_PCLMUL_DETECT +/* We use a boolean to distinguish whether to use the PCLMUL instructions, + * but instead we could use function pointers. It's probably worth + * benchmarking, though it's unlikely to make a measurable difference. + */ +static bool use_pclmul = false; + +/* Declare _both_ variations of our code, statically, + * with different prefixes. */ +PV_DECLARE(pclmul_, static, + u128_from_bytes_pclmul, + u128_to_bytes_pclmul, + pv_xor_y_pclmul, + pv_mul_y_h_pclmul, + PV_BLOCK_STRIDE, + pv_expanded_key_t, + expand_key_pclmul, + pv_add_multiple_pclmul) + +PV_DECLARE(ctmul64_, static, + u128_from_bytes_ctmul64, + u128_to_bytes_ctmul64, + pv_xor_y_ctmul64, + pv_mul_y_h_ctmul64, + BLOCK_STRIDE_NONE, + struct expanded_key_none, + expand_key_none, + add_multiple_none) + void -polyval_key_init(polyval_key_t *pvk, const uint8_t *key) +polyval_key_init(polyval_key_t *pv, const uint8_t *key) { - pvk->h = u128_from_bytes(key); + if (use_pclmul) + pclmul_polyval_key_init(pv, key); + else + ctmul64_polyval_key_init(pv, key); } void polyval_init(polyval_t *pv, const uint8_t *key) { - polyval_key_init(&pv->key, key); - memset(&pv->y, 0, sizeof(u128)); + if (use_pclmul) + pclmul_polyval_init(pv, key); + else + ctmul64_polyval_init(pv, key); } void polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) { - memcpy(&pv->key, key, sizeof(polyval_key_t)); - memset(&pv->y, 0, sizeof(u128)); + if (use_pclmul) + pclmul_polyval_init_from_key(pv, key); + else + ctmul64_polyval_init_from_key(pv, key); } void polyval_add_block(polyval_t *pv, const uint8_t *block) { - u128 b = u128_from_bytes(block); - pv_xor_y(pv, b); - pv_mul_y_h(pv); + if (use_pclmul) + pclmul_polyval_add_block(pv, block); + else + ctmul64_polyval_add_block(pv, block); } void polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) { + if (use_pclmul) + pclmul_polyval_add_zpad(pv, data, n); + else + ctmul64_polyval_add_zpad(pv, data, n); +} +void +polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) +{ + if (use_pclmul) + pclmul_polyval_get_tag(pv, tag_out); + else + ctmul64_polyval_get_tag(pv, tag_out); +} +void +polyval_reset(polyval_t *pv) +{ + if (use_pclmul) + pclmul_polyval_reset(pv); + else + ctmul64_polyval_reset(pv); +} + +#elif defined(PV_USE_PCLMUL) +PV_DECLARE(, , + u128_from_bytes_pclmul, + u128_to_bytes_pclmul, + pv_xor_y_pclmul, + pv_mul_y_h_pclmul, + PV_BLOCK_STRIDE, + pv_expanded_key_t, + expand_key_pclmul, + pv_add_multiple_pclmul) +#elif defined(PV_USE_CTMUL64) +PV_DECLARE(, , + u128_from_bytes_ctmul64, + u128_to_bytes_ctmul64, + pv_xor_y_ctmul64, + pv_mul_y_h_ctmul64, + BLOCK_STRIDE_NONE, + struct expanded_key_none, + expand_key_none, + add_multiple_none) + +#elif defined(PV_USE_CTMUL) +PV_DECLARE(, , u128_from_bytes_ctmul, + u128_to_bytes_ctmul, + pv_xor_y_ctmul, + pv_mul_y_h_ctmul, + BLOCK_STRIDE_NONE, + struct expanded_key_none, + expand_key_none, + add_multiple_none) +#endif + +#ifdef PV_USE_PCLMUL_DETECT +void +polyval_detect_implementation(void) +{ + unsigned int eax, ebc, ecx, edx; + use_pclmul = false; + if (__get_cpuid(1, &eax, &ebc, &ecx, &edx)) { + if (0 != (ecx & (1<<1))) { + use_pclmul = true; + } + } +} +#else +void +polyval_detect_implementation(void) +{ +} +#endif + +#ifdef POLYVAL_USE_EXPANDED_KEYS + +#ifdef PV_USE_PCLMUL_DETECT +#define SHOULD_EXPAND() (use_pclmul) +#else +#define SHOULD_EXPAND() (1) +#endif + +void +polyvalx_init(polyvalx_t *pvx, const uint8_t *key) +{ + polyval_init(&pvx->pv, key); + if (SHOULD_EXPAND()) { + expand_key_pclmul(&pvx->pv, &pvx->expanded); + } +} +void +polyvalx_init_from_key(polyvalx_t *pvx, const polyval_key_t *key) +{ + polyval_init_from_key(&pvx->pv, key); + if (SHOULD_EXPAND()) { + expand_key_pclmul(&pvx->pv, &pvx->expanded); + } +} +void +polyvalx_add_block(polyvalx_t *pvx, const uint8_t *block) +{ + polyval_add_block(&pvx->pv, block); +} +void +polyvalx_add_zpad(polyvalx_t *pvx, const uint8_t *data, size_t n) +{ + if (SHOULD_EXPAND() && n >= PV_BLOCK_STRIDE * 16) { + while (n > PV_BLOCK_STRIDE * 16) { + pv_add_multiple_pclmul(&pvx->pv, data, &pvx->expanded); + data += PV_BLOCK_STRIDE * 16; + n -= PV_BLOCK_STRIDE * 16; + } + } while (n > 16) { - polyval_add_block(pv, data); + polyval_add_block(&pvx->pv, data); data += 16; n -= 16; } @@ -230,19 +494,19 @@ polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) uint8_t block[16]; memset(&block, 0, sizeof(block)); memcpy(block, data, n); - polyval_add_block(pv, block); + polyval_add_block(&pvx->pv, block); } } void -polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) +polyvalx_get_tag(const polyvalx_t *pvx, uint8_t *tag_out) { - u128_to_bytes(pv->y, tag_out); + polyval_get_tag(&pvx->pv, tag_out); } -void -polyval_reset(polyval_t *pv) +void polyvalx_reset(polyvalx_t *pvx) { - memset(&pv->y, 0, sizeof(u128)); + polyval_reset(&pvx->pv); } +#endif #if 0 #include <stdio.h> diff --git a/src/ext/polyval/polyval.h b/src/ext/polyval/polyval.h @@ -16,12 +16,24 @@ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) \ || defined(_M_X64) || defined(_M_IX86) || defined(__i486) \ || defined(__i386__) -/* Use intel intrinsics for carryless multiply. - * - * TODO: In theory we should detect whether we have the relevant instructions, - * but they are all at least 15 years old. +#define PV_INTEL_ARCH +#endif + +#if defined(PV_INTEL_ARCH) && defined(__PCLMUL__) +/* We're building for an architecture that always has the intel + * intrinsics for carryless multiply. + * No need for runtime detection. + */ +#define PV_USE_PCLMUL_UNCONDITIONAL +#define PCLMUL_ANY + +#elif defined(PV_INTEL_ARCH) && SIZEOF_VOID_P >= 8 +/* We _might_ have PCLMUL, or we might not. + * We need to detect it at runtime. */ -#define PV_USE_PCLMUL +#define PV_USE_PCLMUL_DETECT +#define PCLMUL_ANY + #elif SIZEOF_VOID_P >= 8 /* It's a 64-bit architecture; use the generic 64-bit constant-time * implementation. @@ -36,13 +48,26 @@ #error "sizeof(void*) is implausibly weird." #endif +#ifdef PCLMUL_ANY +#include <emmintrin.h> + +#define POLYVAL_USE_EXPANDED_KEYS +#endif + /** * Declare a 128 bit integer type. # The exact representation will depend on which implementation we've chosen. */ -#ifdef PV_USE_PCLMUL -#include <emmintrin.h> +#if defined(PV_USE_PCLMUL_UNCONDITIONAL) typedef __m128i pv_u128_; +#elif defined(PV_USE_PCLMUL_DETECT) +typedef union pv_u128_ { + __m128i u128x1; + struct { + uint64_t lo; + uint64_t hi; + } u64x2; +} pv_u128_; #elif defined(PV_USE_CTMUL64) typedef struct pv_u128_ { uint64_t lo; @@ -117,4 +142,44 @@ void polyval_get_tag(const polyval_t *, uint8_t *tag_out); */ void polyval_reset(polyval_t *); +/** If a faster-than-default polyval implementation is available, use it. */ +void polyval_detect_implementation(void); + +#ifdef POLYVAL_USE_EXPANDED_KEYS +/* These variations are as for polyval_\*, but they use pre-expanded keys. + * They're appropriate when you know a key is likely to get used more than once + * on a large input. + */ + +/** How many blocks to handle at once with an expanded key */ +#define PV_BLOCK_STRIDE 8 +typedef struct pv_expanded_key_t { + // powers of h in reverse order, down to 2. + // (in other words, contains + // h^PCLMUL_BLOCK_STRIDE .. H^2) + __m128i k[PV_BLOCK_STRIDE-1]; +} pv_expanded_key_t; +typedef struct polyvalx_t { + polyval_t pv; + pv_expanded_key_t expanded; +} polyvalx_t; + +void polyvalx_init(polyvalx_t *, const uint8_t *key); +void polyvalx_init_from_key(polyvalx_t *, const polyval_key_t *key); +void polyvalx_add_block(polyvalx_t *, const uint8_t *block); +void polyvalx_add_zpad(polyvalx_t *, const uint8_t *data, size_t n); +void polyvalx_get_tag(const polyvalx_t *, uint8_t *tag_out); +void polyvalx_reset(polyvalx_t *); + +#else +#define polyvalx_t polyval_t +#define polyvalx_key_init polyval_key_init +#define polyvalx_init polyval_init +#define polyvalx_init_from_key polyval_init_from_key +#define polyvalx_add_block polyval_add_block +#define polyvalx_add_zpad polyval_add_zpad +#define polyvalx_get_tag polyval_get_tag +#define polyvalx_reset polyval_reset +#endif + #endif diff --git a/src/lib/crypt_ops/.may_include b/src/lib/crypt_ops/.may_include @@ -26,3 +26,4 @@ keccak-tiny/*.h ed25519/*.h ext/siphash.h +ext/polyval/*.h diff --git a/src/lib/crypt_ops/aes.h b/src/lib/crypt_ops/aes.h @@ -21,14 +21,14 @@ typedef struct aes_cnt_cipher_t aes_cnt_cipher_t; aes_cnt_cipher_t* aes_new_cipher(const uint8_t *key, const uint8_t *iv, int key_bits); +void aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher_, const uint8_t *iv); +void aes_cipher_set_key(aes_cnt_cipher_t *cipher_, + const uint8_t *key, int key_bits); void aes_cipher_free_(aes_cnt_cipher_t *cipher); #define aes_cipher_free(cipher) \ FREE_AND_NULL(aes_cnt_cipher_t, aes_cipher_free_, (cipher)) void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len); -int evaluate_evp_for_aes(int force_value); -int evaluate_ctr_for_aes(void); - #ifdef USE_AES_RAW typedef struct aes_raw_t aes_raw_t; @@ -40,27 +40,6 @@ void aes_raw_free_(aes_raw_t *cipher); FREE_AND_NULL(aes_raw_t, aes_raw_free_, (cipher)) void aes_raw_encrypt(const aes_raw_t *cipher, uint8_t *block); void aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block); - -void aes_raw_counter_xor(const aes_raw_t *aes, - const uint8_t *iv, uint32_t iv_offset, - uint8_t *data, size_t n); -#endif - -#ifdef TOR_AES_PRIVATE -#include "lib/arch/bytes.h" - -/** Increment the big-endian 128-bit counter in 'iv' by 'offset'. */ -static inline void -aes_ctr_add_iv_offset(uint8_t *iv, uint32_t offset) -{ - - uint64_t h_hi = tor_ntohll(get_uint64(iv + 0)); - uint64_t h_lo = tor_ntohll(get_uint64(iv + 8)); - h_lo += offset; - h_hi += (h_lo < offset); - set_uint64(iv + 0, tor_htonll(h_hi)); - set_uint64(iv + 8, tor_htonll(h_lo)); -} #endif #endif /* !defined(TOR_AES_H) */ diff --git a/src/lib/crypt_ops/aes_nss.c b/src/lib/crypt_ops/aes_nss.c @@ -23,9 +23,18 @@ DISABLE_GCC_WARNING("-Wstrict-prototypes") #include <secerr.h> ENABLE_GCC_WARNING("-Wstrict-prototypes") -aes_cnt_cipher_t * -aes_new_cipher(const uint8_t *key, const uint8_t *iv, - int key_bits) +struct aes_cnt_cipher_t { + PK11Context *context; + // We need to keep a copy of the key here since we can't set the IV only. + // It would be nice to fix that, but NSS doesn't see a huge number of + // users. + uint8_t kbytes; + uint8_t key[32]; +}; + +static PK11Context * +aes_new_cipher_internal(const uint8_t *key, const uint8_t *iv, + int key_bits) { const CK_MECHANISM_TYPE ckm = CKM_AES_CTR; SECItem keyItem = { .type = siBuffer, @@ -68,7 +77,18 @@ aes_new_cipher(const uint8_t *key, const uint8_t *iv, PK11_FreeSlot(slot); tor_assert(result); - return (aes_cnt_cipher_t *)result; + return result; +} + +aes_cnt_cipher_t * +aes_new_cipher(const uint8_t *key, const uint8_t *iv, + int key_bits) +{ + aes_cnt_cipher_t *cipher = tor_malloc_zero(sizeof(*cipher)); + cipher->context = aes_new_cipher_internal(key, iv, key_bits); + cipher->kbytes = key_bits / 8; + memcpy(cipher->key, key, cipher->kbytes); + return cipher; } void @@ -76,7 +96,34 @@ aes_cipher_free_(aes_cnt_cipher_t *cipher) { if (!cipher) return; - PK11_DestroyContext((PK11Context*) cipher, PR_TRUE); + PK11_DestroyContext(cipher->context, PR_TRUE); + memwipe(cipher, 0, sizeof(*cipher)); + tor_free(cipher); +} + +void +aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher, const uint8_t *iv) +{ + // For NSS, I could not find a method to change the IV + // of an existing context. Maybe I missed one? + PK11_DestroyContext(cipher->context, PR_TRUE); + cipher->context = aes_new_cipher_internal(cipher->key, iv, + 8*(int)cipher->kbytes); +} + +void +aes_cipher_set_key(aes_cnt_cipher_t *cipher, + const uint8_t *key, int key_bits) +{ + const uint8_t iv[16] = {0}; + // For NSS, I could not find a method to change the key + // of an existing context. Maybe I missed one? + PK11_DestroyContext(cipher->context, PR_TRUE); + memwipe(cipher->key, 0, sizeof(cipher->key)); + + cipher->context = aes_new_cipher_internal(key, iv, key_bits); + cipher->kbytes = key_bits / 8; + memcpy(cipher->key, key, cipher->kbytes); } void @@ -85,29 +132,15 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data_, size_t len_) tor_assert(len_ <= INT_MAX); SECStatus s; - PK11Context *ctx = (PK11Context*)cipher; unsigned char *data = (unsigned char *)data_; int len = (int) len_; int result_len = 0; - s = PK11_CipherOp(ctx, data, &result_len, len, data, len); + s = PK11_CipherOp(cipher->context, data, &result_len, len, data, len); tor_assert(s == SECSuccess); tor_assert(result_len == len); } -int -evaluate_evp_for_aes(int force_value) -{ - (void)force_value; - return 0; -} - -int -evaluate_ctr_for_aes(void) -{ - return 0; -} - aes_raw_t * aes_raw_new(const uint8_t *key, int key_bits, bool encrypt) { @@ -186,37 +219,3 @@ aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block) /* This is the same function call for NSS. */ aes_raw_encrypt(cipher, block); } - -static inline void -xor_bytes(uint8_t *outp, const uint8_t *inp, size_t n) -{ - for (size_t i = 0; i < n; ++i) { - outp[i] ^= inp[i]; - } -} - -void -aes_raw_counter_xor(const aes_raw_t *cipher, - const uint8_t *iv, uint32_t iv_offset, - uint8_t *data, size_t n) -{ - uint8_t counter[16]; - uint8_t buf[16]; - - memcpy(counter, iv, 16); - aes_ctr_add_iv_offset(counter, iv_offset); - - while (n) { - memcpy(buf, counter, 16); - aes_raw_encrypt(cipher, buf); - if (n >= 16) { - xor_bytes(data, buf, 16); - n -= 16; - data += 16; - } else { - xor_bytes(data, buf, n); - break; - } - aes_ctr_add_iv_offset(counter, 1); - } -} diff --git a/src/lib/crypt_ops/aes_openssl.c b/src/lib/crypt_ops/aes_openssl.c @@ -41,36 +41,43 @@ ENABLE_GCC_WARNING("-Wredundant-decls") #include "lib/log/log.h" #include "lib/ctime/di_ops.h" -#ifdef OPENSSL_NO_ENGINE -/* Android's OpenSSL seems to have removed all of its Engine support. */ -#define DISABLE_ENGINES +/* Cached values of our EVP_CIPHER items. If we don't pre-fetch them, + * then EVP_CipherInit calls EVP_CIPHER_fetch itself, + * which is surprisingly expensive. + */ +static const EVP_CIPHER *aes128ctr = NULL; +static const EVP_CIPHER *aes192ctr = NULL; +static const EVP_CIPHER *aes256ctr = NULL; +static const EVP_CIPHER *aes128ecb = NULL; +static const EVP_CIPHER *aes192ecb = NULL; +static const EVP_CIPHER *aes256ecb = NULL; + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(3,0,0) \ + && !defined(LIBRESSL_VERSION_NUMBER) +#define RESOLVE_CIPHER(c) \ + EVP_CIPHER_fetch(NULL, OBJ_nid2sn(EVP_CIPHER_get_nid(c)), "") +#else +#define RESOLVE_CIPHER(c) (c) #endif -/* We have five strategies for implementing AES counter mode. - * - * Best with x86 and x86_64: Use EVP_aes_*_ctr() and EVP_EncryptUpdate(). - * This is possible with OpenSSL 1.0.1, where the counter-mode implementation - * can use bit-sliced or vectorized AES or AESNI as appropriate. - * - * Otherwise: Pick the best possible AES block implementation that OpenSSL - * gives us, and the best possible counter-mode implementation, and combine - * them. +/** + * Pre-fetch the versions of every AES cipher with its associated provider. */ -#if OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,1,0) - -/* With newer OpenSSL versions, the older fallback modes don't compile. So - * don't use them, even if we lack specific acceleration. */ - -#define USE_EVP_AES_CTR - -#elif OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,0,1) && \ - (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_AMD64) || defined(_M_X64) || defined(__INTEL__)) - -#define USE_EVP_AES_CTR - -#endif /* OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,1,0) || ... */ +static void +init_ciphers(void) +{ + aes128ctr = RESOLVE_CIPHER(EVP_aes_128_ctr()); + aes192ctr = RESOLVE_CIPHER(EVP_aes_192_ctr()); + aes256ctr = RESOLVE_CIPHER(EVP_aes_256_ctr()); + aes128ecb = RESOLVE_CIPHER(EVP_aes_128_ecb()); + aes192ecb = RESOLVE_CIPHER(EVP_aes_192_ecb()); + aes256ecb = RESOLVE_CIPHER(EVP_aes_256_ecb()); +} +#define INIT_CIPHERS() STMT_BEGIN { \ + if (PREDICT_UNLIKELY(NULL == aes128ctr)) { \ + init_ciphers(); \ + } \ + } STMT_END /* We have 2 strategies for getting the AES block cipher: Via OpenSSL's * AES_encrypt function, or via OpenSSL's EVP_EncryptUpdate function. @@ -91,30 +98,18 @@ ENABLE_GCC_WARNING("-Wredundant-decls") * make sure that we have a fixed version.) */ -/* Helper function to use EVP with openssl's counter-mode wrapper. */ -static void -evp_block128_fn(const uint8_t in[16], - uint8_t out[16], - const void *key) -{ - EVP_CIPHER_CTX *ctx = (void*)key; - int inl=16, outl=16; - EVP_EncryptUpdate(ctx, out, &outl, in, inl); -} - -#ifdef USE_EVP_AES_CTR - /* We don't actually define the struct here. */ aes_cnt_cipher_t * aes_new_cipher(const uint8_t *key, const uint8_t *iv, int key_bits) { + INIT_CIPHERS(); EVP_CIPHER_CTX *cipher = EVP_CIPHER_CTX_new(); const EVP_CIPHER *c = NULL; switch (key_bits) { - case 128: c = EVP_aes_128_ctr(); break; - case 192: c = EVP_aes_192_ctr(); break; - case 256: c = EVP_aes_256_ctr(); break; + case 128: c = aes128ctr; break; + case 192: c = aes192ctr; break; + case 256: c = aes256ctr; break; default: tor_assert_unreached(); // LCOV_EXCL_LINE } EVP_EncryptInit(cipher, c, key, iv); @@ -129,265 +124,56 @@ aes_cipher_free_(aes_cnt_cipher_t *cipher_) EVP_CIPHER_CTX_reset(cipher); EVP_CIPHER_CTX_free(cipher); } + +/** Changes the key of the cipher; + * sets the IV to 0. + */ void -aes_crypt_inplace(aes_cnt_cipher_t *cipher_, char *data, size_t len) +aes_cipher_set_key(aes_cnt_cipher_t *cipher_, const uint8_t *key, int key_bits) { - int outl; EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_; - - tor_assert(len < INT_MAX); - - EVP_EncryptUpdate(cipher, (unsigned char*)data, - &outl, (unsigned char*)data, (int)len); -} -int -evaluate_evp_for_aes(int force_val) -{ - (void) force_val; - log_info(LD_CRYPTO, "This version of OpenSSL has a known-good EVP " - "counter-mode implementation. Using it."); - return 0; -} -int -evaluate_ctr_for_aes(void) -{ - return 0; -} -#else /* !defined(USE_EVP_AES_CTR) */ - -/*======================================================================*/ -/* Interface to AES code, and counter implementation */ - -/** Implements an AES counter-mode cipher. */ -struct aes_cnt_cipher_t { -/** This next element (however it's defined) is the AES key. */ - union { - EVP_CIPHER_CTX evp; - AES_KEY aes; - } key; - -#if !defined(WORDS_BIGENDIAN) -#define USING_COUNTER_VARS - /** These four values, together, implement a 128-bit counter, with - * counter0 as the low-order word and counter3 as the high-order word. */ - uint32_t counter3; - uint32_t counter2; - uint32_t counter1; - uint32_t counter0; -#endif /* !defined(WORDS_BIGENDIAN) */ - - union { - /** The counter, in big-endian order, as bytes. */ - uint8_t buf[16]; - /** The counter, in big-endian order, as big-endian words. Note that - * on big-endian platforms, this is redundant with counter3...0, - * so we just use these values instead. */ - uint32_t buf32[4]; - } ctr_buf; - - /** The encrypted value of ctr_buf. */ - uint8_t buf[16]; - /** Our current stream position within buf. */ - unsigned int pos; - - /** True iff we're using the evp implementation of this cipher. */ - uint8_t using_evp; -}; - -/** True iff we should prefer the EVP implementation for AES, either because - * we're testing it or because we have hardware acceleration configured */ -static int should_use_EVP = 0; - -/** Check whether we should use the EVP interface for AES. If <b>force_val</b> - * is nonnegative, we use use EVP iff it is true. Otherwise, we use EVP - * if there is an engine enabled for aes-ecb. */ -int -evaluate_evp_for_aes(int force_val) -{ - ENGINE *e; - - if (force_val >= 0) { - should_use_EVP = force_val; - return 0; - } -#ifdef DISABLE_ENGINES - should_use_EVP = 0; -#else - e = ENGINE_get_cipher_engine(NID_aes_128_ecb); - - if (e) { - log_info(LD_CRYPTO, "AES engine \"%s\" found; using EVP_* functions.", - ENGINE_get_name(e)); - should_use_EVP = 1; - } else { - log_info(LD_CRYPTO, "No AES engine found; using AES_* functions."); - should_use_EVP = 0; + uint8_t iv[16] = {0}; + const EVP_CIPHER *c = NULL; + switch (key_bits) { + case 128: c = aes128ctr; break; + case 192: c = aes192ctr; break; + case 256: c = aes256ctr; break; + default: tor_assert_unreached(); // LCOV_EXCL_LINE } -#endif /* defined(DISABLE_ENGINES) */ - return 0; + // No need to call EVP_CIPHER_CTX_Reset here; EncryptInit already + // does it for us. + EVP_EncryptInit(cipher, c, key, iv); } - -/** Test the OpenSSL counter mode implementation to see whether it has the - * counter-mode bug from OpenSSL 1.0.0. If the implementation works, then - * we will use it for future encryption/decryption operations. +/** Change the IV of this stream cipher without changing the key. * - * We can't just look at the OpenSSL version, since some distributions update - * their OpenSSL packages without changing the version number. - **/ -int -evaluate_ctr_for_aes(void) -{ - /* Result of encrypting an all-zero block with an all-zero 128-bit AES key. - * This should be the same as encrypting an all-zero block with an all-zero - * 128-bit AES key in counter mode, starting at position 0 of the stream. - */ - static const unsigned char encrypt_zero[] = - "\x66\xe9\x4b\xd4\xef\x8a\x2c\x3b\x88\x4c\xfa\x59\xca\x34\x2b\x2e"; - unsigned char zero[16]; - unsigned char output[16]; - unsigned char ivec[16]; - unsigned char ivec_tmp[16]; - unsigned int pos, i; - AES_KEY key; - memset(zero, 0, sizeof(zero)); - memset(ivec, 0, sizeof(ivec)); - AES_set_encrypt_key(zero, 128, &key); - - pos = 0; - /* Encrypting a block one byte at a time should make the error manifest - * itself for known bogus openssl versions. */ - for (i=0; i<16; ++i) - AES_ctr128_encrypt(&zero[i], &output[i], 1, &key, ivec, ivec_tmp, &pos); - - if (fast_memneq(output, encrypt_zero, 16)) { - /* Counter mode is buggy */ - /* LCOV_EXCL_START */ - log_err(LD_CRYPTO, "This OpenSSL has a buggy version of counter mode; " - "quitting tor."); - exit(1); // exit ok: openssl is broken. - /* LCOV_EXCL_STOP */ - } - return 0; -} - -#if !defined(USING_COUNTER_VARS) -#define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)]) -#else -#define COUNTER(c, n) ((c)->counter ## n) -#endif - -static void aes_set_key(aes_cnt_cipher_t *cipher, const uint8_t *key, - int key_bits); -static void aes_set_iv(aes_cnt_cipher_t *cipher, const uint8_t *iv); - -/** - * Return a newly allocated counter-mode AES128 cipher implementation, - * using the 128-bit key <b>key</b> and the 128-bit IV <b>iv</b>. - */ -aes_cnt_cipher_t* -aes_new_cipher(const uint8_t *key, const uint8_t *iv, int bits) -{ - aes_cnt_cipher_t* result = tor_malloc_zero(sizeof(aes_cnt_cipher_t)); - - aes_set_key(result, key, bits); - aes_set_iv(result, iv); - - return result; -} - -/** Set the key of <b>cipher</b> to <b>key</b>, which is - * <b>key_bits</b> bits long (must be 128, 192, or 256). Also resets - * the counter to 0. + * Requires that the cipher stream position is at an even multiple of 16 bytes. */ -static void -aes_set_key(aes_cnt_cipher_t *cipher, const uint8_t *key, int key_bits) +void +aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher_, const uint8_t *iv) { - if (should_use_EVP) { - const EVP_CIPHER *c = 0; - switch (key_bits) { - case 128: c = EVP_aes_128_ecb(); break; - case 192: c = EVP_aes_192_ecb(); break; - case 256: c = EVP_aes_256_ecb(); break; - default: tor_assert(0); // LCOV_EXCL_LINE - } - EVP_EncryptInit(&cipher->key.evp, c, key, NULL); - cipher->using_evp = 1; - } else { - AES_set_encrypt_key(key, key_bits,&cipher->key.aes); - cipher->using_evp = 0; - } - -#ifdef USING_COUNTER_VARS - cipher->counter0 = 0; - cipher->counter1 = 0; - cipher->counter2 = 0; - cipher->counter3 = 0; -#endif /* defined(USING_COUNTER_VARS) */ - - memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf)); - - cipher->pos = 0; + EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_; +#ifdef LIBRESSL_VERSION_NUMBER + EVP_CIPHER_CTX_set_iv(cipher, iv, 16); +#else + // We would have to do this if the cipher's position were not aligned: + // EVP_CIPHER_CTX_set_num(cipher, 0); - memset(cipher->buf, 0, sizeof(cipher->buf)); + memcpy(EVP_CIPHER_CTX_iv_noconst(cipher), iv, 16); +#endif } - -/** Release storage held by <b>cipher</b> - */ void -aes_cipher_free_(aes_cnt_cipher_t *cipher) +aes_crypt_inplace(aes_cnt_cipher_t *cipher_, char *data, size_t len) { - if (!cipher) - return; - if (cipher->using_evp) { - EVP_CIPHER_CTX_cleanup(&cipher->key.evp); - } - memwipe(cipher, 0, sizeof(aes_cnt_cipher_t)); - tor_free(cipher); -} + int outl; + EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_; -#if defined(USING_COUNTER_VARS) -#define UPDATE_CTR_BUF(c, n) STMT_BEGIN \ - (c)->ctr_buf.buf32[3-(n)] = htonl((c)->counter ## n); \ - STMT_END -#else -#define UPDATE_CTR_BUF(c, n) -#endif /* defined(USING_COUNTER_VARS) */ + tor_assert(len < INT_MAX); -/** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place. - * Uses the key in <b>cipher</b>, and advances the counter by <b>len</b> bytes - * as it encrypts. - */ -void -aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) -{ - /* Note that the "128" below refers to the length of the counter, - * not the length of the AES key. */ - if (cipher->using_evp) { - /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h. If - * it weren't disabled, it might be better just to use that. - */ - CRYPTO_ctr128_encrypt((const unsigned char *)data, - (unsigned char *)data, - len, - &cipher->key.evp, - cipher->ctr_buf.buf, - cipher->buf, - &cipher->pos, - evp_block128_fn); - } else { - AES_ctr128_encrypt((const unsigned char *)data, - (unsigned char *)data, - len, - &cipher->key.aes, - cipher->ctr_buf.buf, - cipher->buf, - &cipher->pos); - } + EVP_EncryptUpdate(cipher, (unsigned char*)data, + &outl, (unsigned char*)data, (int)len); } -#endif /* defined(USE_EVP_AES_CTR) */ - /* ======== * Functions for "raw" (ECB) AES. * @@ -406,16 +192,19 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len) aes_raw_t * aes_raw_new(const uint8_t *key, int key_bits, bool encrypt) { + INIT_CIPHERS(); EVP_CIPHER_CTX *cipher = EVP_CIPHER_CTX_new(); tor_assert(cipher); const EVP_CIPHER *c = NULL; switch (key_bits) { - case 128: c = EVP_aes_128_ecb(); break; - case 192: c = EVP_aes_192_ecb(); break; - case 256: c = EVP_aes_256_ecb(); break; + case 128: c = aes128ecb; break; + case 192: c = aes192ecb; break; + case 256: c = aes256ecb; break; default: tor_assert_unreached(); } + // No need to call EVP_CIPHER_CTX_Reset here; EncryptInit already + // does it for us. int r = EVP_CipherInit(cipher, c, key, NULL, encrypt); tor_assert(r == 1); EVP_CIPHER_CTX_set_padding(cipher, 0); @@ -432,14 +221,13 @@ aes_raw_set_key(aes_raw_t **cipher_, const uint8_t *key, { const EVP_CIPHER *c = *(EVP_CIPHER**) cipher_; switch (key_bits) { - case 128: c = EVP_aes_128_ecb(); break; - case 192: c = EVP_aes_192_ecb(); break; - case 256: c = EVP_aes_256_ecb(); break; + case 128: c = aes128ecb; break; + case 192: c = aes192ecb; break; + case 256: c = aes256ecb; break; default: tor_assert_unreached(); } aes_raw_t *cipherp = *cipher_; EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *)cipherp; - EVP_CIPHER_CTX_reset(cipher); int r = EVP_CipherInit(cipher, c, key, NULL, encrypt); tor_assert(r == 1); EVP_CIPHER_CTX_set_padding(cipher, 0); @@ -487,30 +275,3 @@ aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block) tor_assert(r == 1); tor_assert(outl == 16); } - -/** - * Use the AES encryption key AES in counter mode, - * starting at the position (iv + iv_offset)*16, - * to encrypt the 'n' bytes of data in 'data'. - * - * Unlike aes_crypt_inplace, this function can re-use the same key repeatedly - * with diferent IVs. - */ -void -aes_raw_counter_xor(const aes_raw_t *cipher, - const uint8_t *iv, uint32_t iv_offset, - uint8_t *data, size_t n) -{ - uint8_t counter[16]; - uint8_t buf[16]; - unsigned int pos = 0; - - memcpy(counter, iv, 16); - if (iv_offset) { - aes_ctr_add_iv_offset(counter, iv_offset); - } - - CRYPTO_ctr128_encrypt(data, data, n, - (EVP_CIPHER_CTX *)cipher, - counter, buf, &pos, evp_block128_fn); -} diff --git a/src/lib/crypt_ops/crypto_init.c b/src/lib/crypt_ops/crypto_init.c @@ -26,6 +26,7 @@ #include "lib/crypt_ops/crypto_options_st.h" #include "lib/conf/conftypes.h" #include "lib/log/util_bug.h" +#include "ext/polyval/polyval.h" #include "lib/subsys/subsys.h" @@ -69,6 +70,8 @@ crypto_early_init(void) crypto_nss_early_init(0); #endif + polyval_detect_implementation(); + if (crypto_seed_rng() < 0) return -1; if (crypto_init_siphash_key() < 0) diff --git a/src/lib/crypt_ops/crypto_openssl_mgt.c b/src/lib/crypt_ops/crypto_openssl_mgt.c @@ -323,9 +323,6 @@ crypto_openssl_late_init(int useAccel, const char *accelName, return -1; } - evaluate_evp_for_aes(-1); - evaluate_ctr_for_aes(); - return 0; } diff --git a/src/test/bench.c b/src/test/bench.c @@ -23,6 +23,9 @@ #include <openssl/obj_mac.h> #endif /* defined(ENABLE_OPENSSL) */ +#include <math.h> + +#include "ext/polyval/polyval.h" #include "core/or/circuitlist.h" #include "app/config/config.h" #include "app/main/subsysmgr.h" @@ -33,6 +36,7 @@ #include "lib/crypt_ops/crypto_rand.h" #include "feature/dircommon/consdiff.h" #include "lib/compress/compress.h" +#include "core/crypto/relay_crypto_cgo.h" #include "core/or/cell_st.h" #include "core/or/or_circuit_st.h" @@ -43,6 +47,27 @@ #include "feature/dirparse/microdesc_parse.h" #include "feature/nodelist/microdesc.h" +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) \ + || defined(_M_X64) || defined(_M_IX86) || defined(__i486) \ + || defined(__i386__) +#define INTEL +#endif + +#ifdef INTEL +#include "x86intrin.h" + +static inline uint64_t +cycles(void) +{ + return __rdtsc(); +} +#define cpb(start, end, bytes) \ + (((double)(end - start)) / (bytes)) +#else +#define cycles() 0 +#define cpb(start,end,bytes) ((void)(start+end+bytes), (double)NAN) +#endif + #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID) static uint64_t nanostart; static inline uint64_t @@ -498,9 +523,9 @@ bench_digest(void) } static void -bench_cell_ops(void) +bench_cell_ops_tor1(void) { - const int iters = 1<<16; + const int iters = 1<<20; int i; /* benchmarks for cell ops at relay. */ @@ -508,6 +533,10 @@ bench_cell_ops(void) cell_t *cell = tor_malloc(sizeof(cell_t)); int outbound; uint64_t start, end; + uint64_t cstart, cend; + + // TODO CGO: use constant after this is merged or rebased. + const unsigned payload_len = 498; crypto_rand((char*)cell->payload, sizeof(cell->payload)); @@ -529,18 +558,35 @@ bench_cell_ops(void) for (outbound = 0; outbound <= 1; ++outbound) { cell_direction_t d = outbound ? CELL_DIRECTION_OUT : CELL_DIRECTION_IN; start = perftime(); + cstart = cycles(); for (i = 0; i < iters; ++i) { char recognized = 0; crypt_path_t *layer_hint = NULL; relay_decrypt_cell(TO_CIRCUIT(or_circ), cell, d, &layer_hint, &recognized); } + cend = cycles(); end = perftime(); - printf("%sbound cells: %.2f ns per cell. (%.2f ns per byte of payload)\n", + printf("%sbound cells: %.2f ns per cell. " + "(%.2f ns per byte of payload, %.2f cpb)\n", outbound?"Out":" In", NANOCOUNT(start,end,iters), - NANOCOUNT(start,end,iters*CELL_PAYLOAD_SIZE)); + NANOCOUNT(start,end,iters * payload_len), + cpb(cstart, cend, iters * payload_len)); + } + + start = perftime(); + cstart = cycles(); + for (i = 0; i < iters; ++i) { + relay_encrypt_cell_inbound(cell, or_circ); } + cend = cycles(); + end = perftime(); + printf("originate inbound : %.2f ns per cell. " + "(%.2f ns per payload byte, %.2f cpb)\n", + NANOCOUNT(start, end, iters), + NANOCOUNT(start, end, iters * payload_len), + cpb(cstart, cend, iters*payload_len)); relay_crypto_clear(&or_circ->crypto); tor_free(or_circ); @@ -548,6 +594,126 @@ bench_cell_ops(void) } static void +bench_polyval(void) +{ + polyval_t pv; + polyvalx_t pvx; + uint8_t key[16]; + uint8_t input[512]; + uint64_t start, end, cstart, cend; + crypto_rand((char*) key, sizeof(key)); + crypto_rand((char*) input, sizeof(input)); + + const int iters = 1<<20; + + polyval_init(&pv, key); + start = perftime(); + cstart = cycles(); + for (int i = 0; i < iters; ++i) { + polyval_add_block(&pv, input); + } + cend = cycles(); + end = perftime(); + printf("polyval (add 16): %.2f ns; %.2f cpb\n", + NANOCOUNT(start, end, iters), + cpb(cstart, cend, iters * 16)); + + start = perftime(); + cstart = cycles(); + for (int i = 0; i < iters; ++i) { + polyval_add_zpad(&pv, input, 512); + } + cend = cycles(); + end = perftime(); + printf("polyval (add 512): %.2f ns; %.2f cpb\n", + NANOCOUNT(start, end, iters), + cpb(cstart, cend, iters * 512)); + + polyvalx_init(&pvx, key); + start = perftime(); + cstart = cycles(); + for (int i = 0; i < iters; ++i) { + polyvalx_add_zpad(&pvx, input, 512); + } + cend = cycles(); + end = perftime(); + printf("polyval (add 512, pre-expanded key): %.2f ns; %.2f cpb\n", + NANOCOUNT(start, end, iters), + cpb(cstart, cend, iters * 512)); +} + +static void +bench_cell_ops_cgo(void) +{ + const int iters = 1<<20; + + /* benchmarks for cell ops at relay. */ + cell_t *cell = tor_malloc(sizeof(cell_t)); + + uint64_t start, end; + uint64_t cstart, cend; + + const uint8_t *tag = NULL; + size_t keylen = cgo_key_material_len(128); + uint8_t *keys = tor_malloc(keylen); + crypto_rand((char*) keys, keylen); + + // We're using the version of this constant that _does_ include + // stream IDs, for an apples-to-apples comparison with tor1. + // + // TODO CGO: use constant after this is merged or rebased. + const unsigned payload_len = 488; + + memset(cell, 0, sizeof(*cell)); + +#define SHOW(operation) \ + printf("%s: %.2f per cell (%.2f cpb)\n", \ + (operation), \ + NANOCOUNT(start,end,iters), \ + cpb(cstart, cend, (double)iters * payload_len)) + + // Initialize crypto + cgo_crypt_t *r_f = cgo_crypt_new(CGO_MODE_RELAY_FORWARD, 128, keys, keylen); + cgo_crypt_t *r_b = cgo_crypt_new(CGO_MODE_RELAY_BACKWARD, 128, keys, keylen); + + reset_perftime(); + + start = perftime(); + cstart = cycles(); + for (int i=0; i < iters; ++i) { + cgo_crypt_relay_forward(r_f, cell, &tag); + } + cend = cycles(); + end = perftime(); + SHOW("CGO outbound at relay"); + + start = perftime(); + cstart = cycles(); + for (int i=0; i < iters; ++i) { + cgo_crypt_relay_backward(r_b, cell); + } + cend = cycles(); + end = perftime(); + SHOW("CGO inbound at relay"); + + start = perftime(); + cstart = cycles(); + for (int i=0; i < iters; ++i) { + cgo_crypt_relay_originate(r_b, cell, &tag); + } + cend = cycles(); + end = perftime(); + SHOW("CGO originate at relay"); + + tor_free(cell); + tor_free(keys); + cgo_crypt_free(r_f); + cgo_crypt_free(r_b); + +#undef SHOW +} + +static void bench_dh(void) { const int iters = 1<<10; @@ -683,13 +849,15 @@ static struct benchmark_t benchmarks[] = { ENT(dmap), ENT(siphash), ENT(digest), + ENT(polyval), ENT(aes), ENT(onion_ntor), ENT(ed25519), ENT(rand), ENT(cell_aes), - ENT(cell_ops), + ENT(cell_ops_tor1), + ENT(cell_ops_cgo), ENT(dh), #ifdef ENABLE_OPENSSL diff --git a/src/test/test_crypto.c b/src/test/test_crypto.c @@ -259,14 +259,12 @@ test_crypto_openssl_version(void *arg) static void test_crypto_aes128(void *arg) { + (void)arg; char *data1 = NULL, *data2 = NULL, *data3 = NULL; crypto_cipher_t *env1 = NULL, *env2 = NULL; int i, j; char *mem_op_hex_tmp=NULL; char key[CIPHER_KEY_LEN]; - int use_evp = !strcmp(arg,"evp"); - evaluate_evp_for_aes(use_evp); - evaluate_ctr_for_aes(); data1 = tor_malloc(1024); data2 = tor_malloc(1024); @@ -1634,14 +1632,12 @@ test_crypto_formats(void *arg) static void test_crypto_aes_iv(void *arg) { + (void)arg; char *plain, *encrypted1, *encrypted2, *decrypted1, *decrypted2; char plain_1[1], plain_15[15], plain_16[16], plain_17[17]; char key1[16], key2[16]; ssize_t encrypted_size, decrypted_size; - int use_evp = !strcmp(arg,"evp"); - evaluate_evp_for_aes(use_evp); - plain = tor_malloc(4095); encrypted1 = tor_malloc(4095 + 1 + 16); encrypted2 = tor_malloc(4095 + 1 + 16); @@ -3201,6 +3197,7 @@ test_crypto_polyval(void *arg) uint8_t output[16]; uint8_t output2[16]; char *mem_op_hex_tmp=NULL; + uint8_t *longer = NULL; // From RFC 8452 const char *key_hex = "25629347589242761d31f826ba4b757b"; @@ -3236,8 +3233,38 @@ test_crypto_polyval(void *arg) polyval_get_tag(&pv, output2); tt_mem_op(output, OP_EQ, output2, 16); + // Try a long input both ways, and make sure the answer is the same. + longer = tor_malloc_zero(4096); + crypto_rand((char *)longer, 4090); // leave zeros at the end. + polyval_reset(&pv); + polyval_add_zpad(&pv, longer, 4090); + polyval_get_tag(&pv, output); + + polyval_reset(&pv); + const uint8_t *cp; + for (cp = longer; cp < longer + 4096; cp += 16) { + polyval_add_block(&pv, cp); + } + polyval_get_tag(&pv, output2); + tt_mem_op(output, OP_EQ, output2, 16); + + // Now the same with polyvalx. + polyvalx_t pvx; + polyvalx_init(&pvx, key); + polyvalx_add_zpad(&pvx, longer, 4090); + polyvalx_get_tag(&pvx, output2); + tt_mem_op(output, OP_EQ, output2, 16); + + polyvalx_reset(&pvx); + for (cp = longer; cp < longer + 4096; cp += 16) { + polyvalx_add_block(&pvx, cp); + } + polyvalx_get_tag(&pvx, output2); + tt_mem_op(output, OP_EQ, output2, 16); + done: tor_free(mem_op_hex_tmp); + tor_free(longer); } static void @@ -3309,103 +3336,102 @@ test_crypto_aes_raw(void *arg) #undef T } +/** Make sure that we can set keys on live AES instances correctly. */ static void -test_crypto_aes_raw_ctr_equiv(void *arg) +test_crypto_aes_keymanip_cnt(void *arg) { (void) arg; - size_t buflen = 65536; - uint8_t *buf = tor_malloc_zero(buflen); - aes_cnt_cipher_t *c = NULL; - aes_raw_t *c_raw = NULL; - - const uint8_t iv[16]; - const uint8_t key[16]; - - // Simple case, IV with zero offset. - for (int i = 0; i < 32; ++i) { - crypto_rand((char*)iv, sizeof(iv)); - crypto_rand((char*)key, sizeof(key)); - c = aes_new_cipher(key, iv, 128); - c_raw = aes_raw_new(key, 128, true); - - aes_crypt_inplace(c, (char*)buf, buflen); - aes_raw_counter_xor(c_raw, iv, 0, buf, buflen); - tt_assert(fast_mem_is_zero((char*)buf, buflen)); - - aes_cipher_free(c); - aes_raw_free(c_raw); - } - // Trickier case, IV with offset == 31. - for (int i = 0; i < 32; ++i) { - crypto_rand((char*)iv, sizeof(iv)); - crypto_rand((char*)key, sizeof(key)); - c = aes_new_cipher(key, iv, 128); - c_raw = aes_raw_new(key, 128, true); - - aes_crypt_inplace(c, (char*)buf, buflen); - size_t off = 31*16; - aes_raw_counter_xor(c_raw, iv, 31, buf + off, buflen - off); - tt_assert(fast_mem_is_zero((char*)buf + off, buflen - off)); - - aes_cipher_free(c); - aes_raw_free(c_raw); - } + uint8_t k1[16] = "123456780123678"; + uint8_t k2[16] = "abcdefghijklmno"; + int kbits = 128; + uint8_t iv1[16]= "{return 4;}////"; + uint8_t iv2[16] = {0}; + uint8_t buf[128] = {0}; + uint8_t buf2[128] = {0}; + + aes_cnt_cipher_t *aes = aes_new_cipher(k1, iv1, kbits); + aes_crypt_inplace(aes, (char*)buf, sizeof(buf)); + + aes_cnt_cipher_t *aes2 = aes_new_cipher(k2, iv2, kbits); + // 128-5 to make sure internal buf is cleared when we set key. + aes_crypt_inplace(aes2, (char*)buf2, sizeof(buf2)-5); + aes_cipher_set_key(aes2, k1, kbits); + aes_cipher_set_iv_aligned(aes2, iv1); // should work in this case. + memset(buf2, 0, sizeof(buf2)); + aes_crypt_inplace(aes2, (char*)buf2, sizeof(buf2)); + tt_mem_op(buf, OP_EQ, buf2, sizeof(buf)); done: - aes_cipher_free(c); - aes_raw_free(c_raw); - tor_free(buf); + aes_cipher_free(aes); + aes_cipher_free(aes2); } -/* Make sure that our IV addition code is correct. - * - * We test this function separately to make sure we handle corner cases well; - * the corner cases are rare enough that we shouldn't expect to see them in - * randomized testing. - */ static void -test_crypto_aes_cnt_iv_manip(void *arg) +test_crypto_aes_keymanip_ecb(void *arg) { - (void)arg; - uint8_t buf[16]; - uint8_t expect[16]; - int n; -#define T(pre, off, post) STMT_BEGIN { \ - n = base16_decode((char*)buf, sizeof(buf), \ - (pre), strlen(pre)); \ - tt_int_op(n, OP_EQ, sizeof(buf)); \ - n = base16_decode((char*)expect, sizeof(expect), \ - (post), strlen(post)); \ - tt_int_op(n, OP_EQ, sizeof(expect)); \ - aes_ctr_add_iv_offset(buf, (off)); \ - tt_mem_op(buf, OP_EQ, expect, 16); \ - } STMT_END - - T("00000000000000000000000000000000", 0x4032, - "00000000000000000000000000004032"); - T("0000000000000000000000000000ffff", 0x4032, - "00000000000000000000000000014031"); - // We focus on "31" here because that's what CGO uses. - T("000000000000000000000000ffffffe0", 31, - "000000000000000000000000ffffffff"); - T("000000000000000000000000ffffffe1", 31, - "00000000000000000000000100000000"); - T("0000000100000000ffffffffffffffe0", 31, - "0000000100000000ffffffffffffffff"); - T("0000000100000000ffffffffffffffe1", 31, - "00000001000000010000000000000000"); - T("0000000ffffffffffffffffffffffff0", 31, - "0000001000000000000000000000000f"); - T("ffffffffffffffffffffffffffffffe0", 31, - "ffffffffffffffffffffffffffffffff"); - T("ffffffffffffffffffffffffffffffe1", 31, - "00000000000000000000000000000000"); - T("ffffffffffffffffffffffffffffffe8", 31, - "00000000000000000000000000000007"); + (void) arg; + uint8_t k1[16] = "123456780123678"; + uint8_t k2[16] = "abcdefghijklmno"; + int kbits = 128; + uint8_t buf_orig[16] = {1,2,3,0}; + uint8_t buf1[16]; + uint8_t buf2[16]; + + aes_raw_t *aes1 = aes_raw_new(k1, kbits, true); + aes_raw_t *aes2 = aes_raw_new(k1, kbits, false); + aes_raw_set_key(&aes2, k2, kbits, false); + + memcpy(buf1, buf_orig, 16); + memcpy(buf2, buf_orig, 16); + + aes_raw_encrypt(aes1, buf1); + aes_raw_encrypt(aes1, buf2); + tt_mem_op(buf1, OP_EQ, buf2, 16); + + aes_raw_decrypt(aes2, buf1); + aes_raw_set_key(&aes2, k1, kbits, false); + aes_raw_decrypt(aes2, buf2); + + tt_mem_op(buf1, OP_NE, buf2, 16); + tt_mem_op(buf2, OP_EQ, buf_orig, 16); -#undef T done: - ; + aes_raw_free(aes1); + aes_raw_free(aes2); +} + +static void +test_crypto_aes_cnt_set_iv(void *arg) +{ + (void)arg; + uint8_t k1[16] = "123456780123678"; + uint8_t iv_zero[16] = {0}; + int kbits = 128; + const int iters = 100; + uint8_t buf1[128]; + uint8_t buf2[128]; + + aes_cnt_cipher_t *aes1, *aes2 = NULL; + aes1 = aes_new_cipher(k1, iv_zero, kbits); + + for (int i = 0; i < iters; ++i) { + uint8_t iv[16]; + crypto_rand((char*) iv, sizeof(iv)); + memset(buf1, 0, sizeof(buf1)); + memset(buf2, 0, sizeof(buf2)); + + aes_cipher_set_iv_aligned(aes1, iv); + aes2 = aes_new_cipher(k1, iv, kbits); + + aes_crypt_inplace(aes1, (char*)buf1, sizeof(buf1)); + aes_crypt_inplace(aes2, (char*)buf1, sizeof(buf2)); + tt_mem_op(buf1, OP_EQ, buf2, sizeof(buf1)); + + aes_cipher_free(aes2); + } + done: + aes_cipher_free(aes1); + aes_cipher_free(aes2); } #ifndef COCCI @@ -3424,8 +3450,7 @@ test_crypto_aes_cnt_iv_manip(void *arg) struct testcase_t crypto_tests[] = { CRYPTO_LEGACY(formats), { "openssl_version", test_crypto_openssl_version, TT_FORK, NULL, NULL }, - { "aes_AES", test_crypto_aes128, TT_FORK, &passthrough_setup, (void*)"aes" }, - { "aes_EVP", test_crypto_aes128, TT_FORK, &passthrough_setup, (void*)"evp" }, + { "aes_AES", test_crypto_aes128, TT_FORK, NULL, NULL }, { "aes128_ctr_testvec", test_crypto_aes_ctr_testvec, 0, &passthrough_setup, (void*)"128" }, { "aes192_ctr_testvec", test_crypto_aes_ctr_testvec, 0, @@ -3446,10 +3471,7 @@ struct testcase_t crypto_tests[] = { { "sha3_xof", test_crypto_sha3_xof, TT_FORK, NULL, NULL}, { "mac_sha3", test_crypto_mac_sha3, TT_FORK, NULL, NULL}, CRYPTO_LEGACY(dh), - { "aes_iv_AES", test_crypto_aes_iv, TT_FORK, &passthrough_setup, - (void*)"aes" }, - { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, &passthrough_setup, - (void*)"evp" }, + { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, NULL, NULL }, CRYPTO_LEGACY(base32_decode), { "kdf_TAP", test_crypto_kdf_TAP, 0, NULL, NULL }, { "hkdf_sha256", test_crypto_hkdf_sha256, 0, NULL, NULL }, @@ -3477,7 +3499,8 @@ struct testcase_t crypto_tests[] = { { "failure_modes", test_crypto_failure_modes, TT_FORK, NULL, NULL }, { "polyval", test_crypto_polyval, 0, NULL, NULL }, { "aes_raw", test_crypto_aes_raw, 0, NULL, NULL }, - { "aes_raw_ctr_equiv", test_crypto_aes_raw_ctr_equiv, 0, NULL, NULL }, - { "aes_cnt_iv_manip", test_crypto_aes_cnt_iv_manip, 0, NULL, NULL }, + { "aes_keymanip_cnt", test_crypto_aes_keymanip_cnt, 0, NULL, NULL }, + { "aes_keymanip_ecb", test_crypto_aes_keymanip_ecb, 0, NULL, NULL }, + { "aes_cnt_set_iv", test_crypto_aes_cnt_set_iv, 0, NULL, NULL }, END_OF_TESTCASES };