commit af67487d7bfae08784886362e9b208acd998c389
parent da60d5c1e64a18dc12cdbd4de4480731c164d0e6
Author: Nick Mathewson <nickm@torproject.org>
Date: Thu, 22 May 2025 10:20:08 -0400
Merge branch 'cgo-faster' into 'main'
Portability and speed improvements to cgo crypto
See merge request tpo/core/tor!900
Diffstat:
15 files changed, 930 insertions(+), 589 deletions(-)
diff --git a/src/core/crypto/relay_crypto_cgo.c b/src/core/crypto/relay_crypto_cgo.c
@@ -54,7 +54,7 @@ cgo_et_init(cgo_et_t *et, int aesbits, bool encrypt,
et->kb = aes_raw_new(key, aesbits, encrypt);
if (et->kb == NULL)
return -1;
- polyval_key_init(&et->ku, key + aes_key_bytes);
+ polyvalx_init(&et->ku, key + aes_key_bytes);
return 0;
}
/** Replace the key on an existing, already initialized cgo_et_t.
@@ -66,25 +66,24 @@ cgo_et_set_key(cgo_et_t *et, int aesbits, bool encrypt,
{
size_t aes_key_bytes = aesbits / 8;
aes_raw_set_key(&et->kb, key, aesbits, encrypt);
- polyval_key_init(&et->ku, key + aes_key_bytes);
+ polyvalx_init(&et->ku, key + aes_key_bytes);
}
/** Helper: Compute polyval(KU, H | CMD | X_R). */
static inline void
-compute_et_mask(polyval_key_t *pvk, const et_tweak_t tweak, uint8_t *t_out)
+compute_et_mask(polyvalx_t *pvk, const et_tweak_t tweak, uint8_t *t_out)
{
// block 0: tweak.h
// block 1: one byte of command, first 15 bytes of x_r
// block 2...: remainder of x_r, zero-padded.
- polyval_t pv;
+ polyvalx_reset(pvk);
uint8_t block1[16];
block1[0] = tweak.uiv.cmd;
memcpy(block1+1, tweak.x_r, 15);
- polyval_init_from_key(&pv, pvk);
- polyval_add_block(&pv, tweak.uiv.h);
- polyval_add_block(&pv, block1);
- polyval_add_zpad(&pv, tweak.x_r + 15, ET_TWEAK_LEN_X_R - 15);
- polyval_get_tag(&pv, t_out);
+ polyvalx_add_block(pvk, tweak.uiv.h);
+ polyvalx_add_block(pvk, block1);
+ polyvalx_add_zpad(pvk, tweak.x_r + 15, ET_TWEAK_LEN_X_R - 15);
+ polyvalx_get_tag(pvk, t_out);
}
/** XOR the 16 byte block from inp into out. */
static void
@@ -148,9 +147,10 @@ STATIC int
cgo_prf_init(cgo_prf_t *prf, int aesbits,
const uint8_t *key)
{
+ const uint8_t iv[16] = {0};
size_t aes_key_bytes = aesbits / 8;
memset(prf,0, sizeof(*prf));
- prf->k = aes_raw_new(key, aesbits, true);
+ prf->k = aes_new_cipher(key, iv, aesbits);
polyval_key_init(&prf->b, key + aes_key_bytes);
return 0;
}
@@ -162,7 +162,7 @@ cgo_prf_set_key(cgo_prf_t *prf, int aesbits,
const uint8_t *key)
{
size_t aes_key_bytes = aesbits / 8;
- aes_raw_set_key(&prf->k, key, aesbits, true);
+ aes_cipher_set_key(prf->k, key, aesbits);
polyval_key_init(&prf->b, key + aes_key_bytes);
}
/**
@@ -184,7 +184,15 @@ cgo_prf_xor_t0(cgo_prf_t *prf, const uint8_t *input,
polyval_get_tag(&pv, hash);
hash[15] &= 0xC0; // Clear the low six bits.
- aes_raw_counter_xor(prf->k, hash, 0, data, PRF_T0_DATA_LEN);
+ aes_cipher_set_iv_aligned(prf->k, hash);
+ aes_crypt_inplace(prf->k, (char*) data, PRF_T0_DATA_LEN);
+
+ // Re-align the cipher.
+ //
+ // This approach is faster than EVP_CIPHER_set_num!
+ const int ns = 16 - (PRF_T0_DATA_LEN % 0xf);
+ // We're not using the hash for anything, so it's okay to overwrite
+ aes_crypt_inplace(prf->k, (char*)hash, ns);
}
/**
* Generate 'n' bytes of the PRF's results on 'input', for position t=1,
@@ -203,9 +211,18 @@ cgo_prf_gen_t1(cgo_prf_t *prf, const uint8_t *input,
polyval_add_block(&pv, input);
polyval_get_tag(&pv, hash);
hash[15] &= 0xC0; // Clear the low six bits.
+ hash[15] += T1_OFFSET; // Can't overflow!
memset(buf, 0, n);
- aes_raw_counter_xor(prf->k, hash, T1_OFFSET, buf, n);
+ aes_cipher_set_iv_aligned(prf->k, hash);
+ aes_crypt_inplace(prf->k, (char*)buf, n);
+
+ // Re-align the cipher.
+ size_t ns = 16-(n&0x0f);
+ if (ns) {
+ // We're not using the hash for anything, so it's okay to overwrite
+ aes_crypt_inplace(prf->k, (char*) hash, ns);
+ }
}
/**
* Release any storage held in 'prf'.
@@ -215,7 +232,7 @@ cgo_prf_gen_t1(cgo_prf_t *prf, const uint8_t *input,
STATIC void
cgo_prf_clear(cgo_prf_t *prf)
{
- aes_raw_free(prf->k);
+ aes_cipher_free(prf->k);
}
static int
diff --git a/src/core/crypto/relay_crypto_cgo.h b/src/core/crypto/relay_crypto_cgo.h
@@ -77,23 +77,18 @@ typedef struct cgo_et_t {
*/
aes_raw_t *kb;
/**
- * Polyval key.
+ * Polyval instance, with expanded key.
*/
- polyval_key_t ku;
+ polyvalx_t ku;
} cgo_et_t;
/**
* Keyed pseudorandom function, based on polyval and AES-CTR.
*/
typedef struct cgo_prf_t {
/**
- * AES key: may be 128, 192, or 256 bits.
- *
- * Even though we're going to be using this in counter mode,
- * we don't make an aes_cnt_cipher_t here, since that type
- * does not support efficient re-use of the key with multiple
- * IVs.
+ * AES stream cipher: may be 128, 192, or 256 bits.
*/
- aes_raw_t *k;
+ aes_cnt_cipher_t *k;
/**
* Polyval instance.
*/
diff --git a/src/ext/polyval/ctmul.c b/src/ext/polyval/ctmul.c
@@ -194,7 +194,7 @@ bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
#endif
static void
-pv_mul_y_h(polyval_t *pv)
+pv_mul_y_h_ctmul(polyval_t *pv)
{
uint32_t *yw = pv->y.v;
const uint32_t *hw = pv->key.h.v;
diff --git a/src/ext/polyval/ctmul64.c b/src/ext/polyval/ctmul64.c
@@ -73,15 +73,15 @@ rev64(uint64_t x)
static void
-pv_mul_y_h(polyval_t *pv)
+pv_mul_y_h_ctmul64(polyval_t *pv)
{
uint64_t y0, y1;
uint64_t h0, h1, h2, h0r, h1r, h2r;
- y0 = pv->y.lo;
- y1 = pv->y.hi;
- h0 = pv->key.h.lo;
- h1 = pv->key.h.hi;
+ y0 = CTMUL64_MEMBER(pv->y).lo;
+ y1 = CTMUL64_MEMBER(pv->y).hi;
+ h0 = CTMUL64_MEMBER(pv->key.h).lo;
+ h1 = CTMUL64_MEMBER(pv->key.h).hi;
h0r = rev64(h0);
h1r = rev64(h1);
@@ -127,7 +127,7 @@ pv_mul_y_h(polyval_t *pv)
v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
- pv->y.lo = v2;
- pv->y.hi = v3;
+ CTMUL64_MEMBER(pv->y).lo = v2;
+ CTMUL64_MEMBER(pv->y).hi = v3;
}
}
diff --git a/src/ext/polyval/pclmul.c b/src/ext/polyval/pclmul.c
@@ -72,7 +72,7 @@ BR_TARGETS_X86_UP
* We use a target of "sse2" only, so that Clang may still handle the
* '__m128i' type and allocate SSE2 registers.
*/
-#ifdef __clang__
+#ifdef __clang__AND_NOT_WORKING
BR_TARGET("sse2")
static inline __m128i
pclmulqdq00(__m128i x, __m128i y)
@@ -149,21 +149,90 @@ pclmulqdq11(__m128i x, __m128i y)
} while (0)
+BR_TARGET("ssse3,pclmul")
+static inline void
+expand_key_pclmul(const polyval_t *pv, pv_expanded_key_t *out)
+{
+ __m128i h1w, h1x;
+ __m128i lastw, lastx;
+ __m128i t0, t1, t2, t3;
+
+ h1w = PCLMUL_MEMBER(pv->key.h);
+ BK(h1w, h1x);
+ lastw = h1w;
+
+ for (int i = PV_BLOCK_STRIDE - 2; i >= 0; --i) {
+ BK(lastw, lastx);
+
+ t1 = pclmulqdq11(lastw, h1w);
+ t3 = pclmulqdq00(lastw, h1w);
+ t2 = pclmulqdq00(lastx, h1x);
+ t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+ t0 = _mm_shuffle_epi32(t1, 0x0E);
+ t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+ t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+ REDUCE_F128(t0, t1, t2, t3);
+ out->k[i] = lastw = _mm_unpacklo_epi64(t1, t0);
+ }
+}
+
+// Add PCLMUL_BLOCK_STRIDE * 16 bytes from input.
+BR_TARGET("ssse3,pclmul")
+static inline void
+pv_add_multiple_pclmul(polyval_t *pv,
+ const uint8_t *input,
+ const pv_expanded_key_t *expanded)
+{
+ __m128i t0, t1, t2, t3;
+
+ t1 = _mm_setzero_si128();
+ t2 = _mm_setzero_si128();
+ t3 = _mm_setzero_si128();
+
+ for (int i = 0; i < PV_BLOCK_STRIDE; ++i, input += 16) {
+ __m128i aw = _mm_loadu_si128((void *)(input));
+ __m128i ax;
+ __m128i hx, hw;
+ if (i == 0) {
+ aw = _mm_xor_si128(aw, PCLMUL_MEMBER(pv->y));
+ }
+ if (i == PV_BLOCK_STRIDE - 1) {
+ hw = PCLMUL_MEMBER(pv->key.h);
+ } else {
+ hw = expanded->k[i];
+ }
+ BK(aw, ax);
+ BK(hw, hx);
+ t1 = _mm_xor_si128(t1, pclmulqdq11(aw, hw));
+ t3 = _mm_xor_si128(t3, pclmulqdq00(aw, hw));
+ t2 = _mm_xor_si128(t2, pclmulqdq00(ax, hx));
+ }
+
+ t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+ t0 = _mm_shuffle_epi32(t1, 0x0E);
+ t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+ t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+
+ REDUCE_F128(t0, t1, t2, t3);
+ PCLMUL_MEMBER(pv->y) = _mm_unpacklo_epi64(t1, t0);
+}
+
+
/* see bearssl_hash.h */
BR_TARGET("ssse3,pclmul")
-static
-void pv_mul_y_h(polyval_t *pv)
+static inline void
+pv_mul_y_h_pclmul(polyval_t *pv)
{
__m128i yw, h1w, h1x;
- h1w = pv->key.h;
+ h1w = PCLMUL_MEMBER(pv->key.h);
BK(h1w, h1x);
{
__m128i aw, ax;
__m128i t0, t1, t2, t3;
- aw = pv->y;
+ aw = PCLMUL_MEMBER(pv->y);
BK(aw, ax);
t1 = pclmulqdq11(aw, h1w);
@@ -180,5 +249,5 @@ void pv_mul_y_h(polyval_t *pv)
yw = _mm_unpacklo_epi64(t1, t0);
}
- pv->y = yw;
+ PCLMUL_MEMBER(pv->y) = yw;
}
diff --git a/src/ext/polyval/polyval.c b/src/ext/polyval/polyval.c
@@ -39,6 +39,10 @@
#include <string.h>
+#ifdef PV_USE_PCLMUL_DETECT
+#include <cpuid.h>
+#endif
+
typedef pv_u128_ u128;
/* ========
@@ -47,6 +51,7 @@ typedef pv_u128_ u128;
* They have different definitions depending on our representation
* of 128-bit integers.
*/
+#if 0
/**
* Read a u128-bit little-endian integer from 'bytes',
* which may not be aligned.
@@ -72,7 +77,8 @@ static inline void pv_xor_y(polyval_t *, u128 v);
*
* (This is a carryless multiply in the Polyval galois field)
*/
-static void pv_mul_y_h(polyval_t *);
+static void pv_mul_y_h(polyval_t *);h
+#endif
/* =====
* Endianness conversion for big-endian platforms
@@ -116,58 +122,77 @@ bswap32(uint64_t v)
#define convert_byte_order32(x) (x)
#endif
-#ifdef PV_USE_PCLMUL
+#if defined PV_USE_PCLMUL_UNCONDITIONAL
+#define PCLMUL_MEMBER(v) (v)
+#define PV_USE_PCLMUL
+#elif defined PV_USE_PCLMUL_DETECT
+#define PCLMUL_MEMBER(v) (v).u128x1
+#define CTMUL64_MEMBER(v) (v).u64x2
+#define PV_USE_PCLMUL
+#define PV_USE_CTMUL64
+
+#elif defined PV_USE_CTMUL64
+#define CTMUL64_MEMBER(v) (v)
+#endif
+
+#ifdef PV_USE_PCLMUL
#include "ext/polyval/pclmul.c"
static inline u128
-u128_from_bytes(const uint8_t *bytes)
+u128_from_bytes_pclmul(const uint8_t *bytes)
{
- return _mm_loadu_si128((const u128*)bytes);
+ u128 r;
+ PCLMUL_MEMBER(r) = _mm_loadu_si128((const __m128i*)bytes);
+ return r;
}
static inline void
-u128_to_bytes(u128 val, uint8_t *bytes_out)
+u128_to_bytes_pclmul(u128 val, uint8_t *bytes_out)
{
- _mm_storeu_si128((u128*)bytes_out, val);
+ _mm_storeu_si128((__m128i*)bytes_out, PCLMUL_MEMBER(val));
}
static inline void
-pv_xor_y(polyval_t *pv, u128 v)
+pv_xor_y_pclmul(polyval_t *pv, u128 v)
{
- pv->y = _mm_xor_si128(pv->y, v);
+ PCLMUL_MEMBER(pv->y) = _mm_xor_si128(PCLMUL_MEMBER(pv->y),
+ PCLMUL_MEMBER(v));
}
-#elif defined(PV_USE_CTMUL64)
+#endif
+#if defined(PV_USE_CTMUL64)
#include "ext/polyval/ctmul64.c"
static inline u128
-u128_from_bytes(const uint8_t *bytes)
+u128_from_bytes_ctmul64(const uint8_t *bytes)
{
u128 r;
- memcpy(&r.lo, bytes, 8);
- memcpy(&r.hi, bytes + 8, 8);
- r.lo = convert_byte_order64(r.lo);
- r.hi = convert_byte_order64(r.hi);
+ memcpy(&CTMUL64_MEMBER(r).lo, bytes, 8);
+ memcpy(&CTMUL64_MEMBER(r).hi, bytes + 8, 8);
+ CTMUL64_MEMBER(r).lo = convert_byte_order64(CTMUL64_MEMBER(r).lo);
+ CTMUL64_MEMBER(r).hi = convert_byte_order64(CTMUL64_MEMBER(r).hi);
return r;
}
static inline void
-u128_to_bytes(u128 val, uint8_t *bytes_out)
+u128_to_bytes_ctmul64(u128 val, uint8_t *bytes_out)
{
- uint64_t lo = convert_byte_order64(val.lo);
- uint64_t hi = convert_byte_order64(val.hi);
+ uint64_t lo = convert_byte_order64(CTMUL64_MEMBER(val).lo);
+ uint64_t hi = convert_byte_order64(CTMUL64_MEMBER(val).hi);
memcpy(bytes_out, &lo, 8);
memcpy(bytes_out + 8, &hi, 8);
}
static inline void
-pv_xor_y(polyval_t *pv, u128 val)
+pv_xor_y_ctmul64(polyval_t *pv, u128 val)
{
- pv->y.lo ^= val.lo;
- pv->y.hi ^= val.hi;
+ CTMUL64_MEMBER(pv->y).lo ^= CTMUL64_MEMBER(val).lo;
+ CTMUL64_MEMBER(pv->y).hi ^= CTMUL64_MEMBER(val).hi;
}
-#elif defined(PV_USE_CTMUL)
+#endif
+
+#if defined(PV_USE_CTMUL)
#include "ext/polyval/ctmul.c"
static inline u128
-u128_from_bytes(const uint8_t *bytes)
+u128_from_bytes_ctmul(const uint8_t *bytes)
{
u128 r;
memcpy(&r.v, bytes, 16);
@@ -177,7 +202,7 @@ u128_from_bytes(const uint8_t *bytes)
return r;
}
static inline void
-u128_to_bytes(u128 val, uint8_t *bytes_out)
+u128_to_bytes_ctmul(u128 val, uint8_t *bytes_out)
{
uint32_t v[4];
for (int i = 0; i < 4; ++i) {
@@ -186,7 +211,7 @@ u128_to_bytes(u128 val, uint8_t *bytes_out)
memcpy(bytes_out, v, 16);
}
static inline void
-pv_xor_y(polyval_t *pv, u128 val)
+pv_xor_y_ctmul(polyval_t *pv, u128 val)
{
for (int i = 0; i < 4; ++i) {
pv->y.v[i] ^= val.v[i];
@@ -194,35 +219,274 @@ pv_xor_y(polyval_t *pv, u128 val)
}
#endif
+struct expanded_key_none {};
+static inline void add_multiple_none(polyval_t *pv,
+ const uint8_t *input,
+ const struct expanded_key_none *expanded)
+{
+ (void) pv;
+ (void) input;
+ (void) expanded;
+}
+static inline void expand_key_none(const polyval_t *inp,
+ struct expanded_key_none *out)
+{
+ (void) inp;
+ (void) out;
+}
+
+/* Kludge: a special value to use for block_stride when we don't support
+ * processing multiple blocks at once. Previously we used 0, but that
+ * caused warnings with some comparisons. */
+#define BLOCK_STRIDE_NONE 0xffff
+
+#define PV_DECLARE(prefix, \
+ st, \
+ u128_from_bytes, \
+ u128_to_bytes, \
+ pv_xor_y, \
+ pv_mul_y_h, \
+ block_stride, \
+ expanded_key_tp, expand_fn, add_multiple_fn) \
+ st void \
+ prefix ## polyval_key_init(polyval_key_t *pvk, const uint8_t *key) \
+ { \
+ pvk->h = u128_from_bytes(key); \
+ } \
+ st void \
+ prefix ## polyval_init(polyval_t *pv, const uint8_t *key) \
+ { \
+ polyval_key_init(&pv->key, key); \
+ memset(&pv->y, 0, sizeof(u128)); \
+ } \
+ st void \
+ prefix ## polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) \
+ { \
+ memcpy(&pv->key, key, sizeof(polyval_key_t)); \
+ memset(&pv->y, 0, sizeof(u128)); \
+ } \
+ st void \
+ prefix ## polyval_add_block(polyval_t *pv, const uint8_t *block) \
+ { \
+ u128 b = u128_from_bytes(block); \
+ pv_xor_y(pv, b); \
+ pv_mul_y_h(pv); \
+ } \
+ st void \
+ prefix ## polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) \
+ { \
+ /* since block_stride is a constant, this should get optimized */ \
+ if ((block_stride != BLOCK_STRIDE_NONE) \
+ && n >= (block_stride) * 16) { \
+ expanded_key_tp expanded_key; \
+ expand_fn(pv, &expanded_key); \
+ while (n >= (block_stride) * 16) { \
+ add_multiple_fn(pv, data, &expanded_key); \
+ n -= block_stride*16; \
+ data += block_stride * 16; \
+ } \
+ } \
+ while (n > 16) { \
+ polyval_add_block(pv, data); \
+ data += 16; \
+ n -= 16; \
+ } \
+ if (n) { \
+ uint8_t block[16]; \
+ memset(&block, 0, sizeof(block)); \
+ memcpy(block, data, n); \
+ polyval_add_block(pv, block); \
+ } \
+ } \
+ st void \
+ prefix ## polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) \
+ { \
+ u128_to_bytes(pv->y, tag_out); \
+ } \
+ st void \
+ prefix ## polyval_reset(polyval_t *pv) \
+ { \
+ memset(&pv->y, 0, sizeof(u128)); \
+ }
+
+#ifdef PV_USE_PCLMUL_DETECT
+/* We use a boolean to distinguish whether to use the PCLMUL instructions,
+ * but instead we could use function pointers. It's probably worth
+ * benchmarking, though it's unlikely to make a measurable difference.
+ */
+static bool use_pclmul = false;
+
+/* Declare _both_ variations of our code, statically,
+ * with different prefixes. */
+PV_DECLARE(pclmul_, static,
+ u128_from_bytes_pclmul,
+ u128_to_bytes_pclmul,
+ pv_xor_y_pclmul,
+ pv_mul_y_h_pclmul,
+ PV_BLOCK_STRIDE,
+ pv_expanded_key_t,
+ expand_key_pclmul,
+ pv_add_multiple_pclmul)
+
+PV_DECLARE(ctmul64_, static,
+ u128_from_bytes_ctmul64,
+ u128_to_bytes_ctmul64,
+ pv_xor_y_ctmul64,
+ pv_mul_y_h_ctmul64,
+ BLOCK_STRIDE_NONE,
+ struct expanded_key_none,
+ expand_key_none,
+ add_multiple_none)
+
void
-polyval_key_init(polyval_key_t *pvk, const uint8_t *key)
+polyval_key_init(polyval_key_t *pv, const uint8_t *key)
{
- pvk->h = u128_from_bytes(key);
+ if (use_pclmul)
+ pclmul_polyval_key_init(pv, key);
+ else
+ ctmul64_polyval_key_init(pv, key);
}
void
polyval_init(polyval_t *pv, const uint8_t *key)
{
- polyval_key_init(&pv->key, key);
- memset(&pv->y, 0, sizeof(u128));
+ if (use_pclmul)
+ pclmul_polyval_init(pv, key);
+ else
+ ctmul64_polyval_init(pv, key);
}
void
polyval_init_from_key(polyval_t *pv, const polyval_key_t *key)
{
- memcpy(&pv->key, key, sizeof(polyval_key_t));
- memset(&pv->y, 0, sizeof(u128));
+ if (use_pclmul)
+ pclmul_polyval_init_from_key(pv, key);
+ else
+ ctmul64_polyval_init_from_key(pv, key);
}
void
polyval_add_block(polyval_t *pv, const uint8_t *block)
{
- u128 b = u128_from_bytes(block);
- pv_xor_y(pv, b);
- pv_mul_y_h(pv);
+ if (use_pclmul)
+ pclmul_polyval_add_block(pv, block);
+ else
+ ctmul64_polyval_add_block(pv, block);
}
void
polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n)
{
+ if (use_pclmul)
+ pclmul_polyval_add_zpad(pv, data, n);
+ else
+ ctmul64_polyval_add_zpad(pv, data, n);
+}
+void
+polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)
+{
+ if (use_pclmul)
+ pclmul_polyval_get_tag(pv, tag_out);
+ else
+ ctmul64_polyval_get_tag(pv, tag_out);
+}
+void
+polyval_reset(polyval_t *pv)
+{
+ if (use_pclmul)
+ pclmul_polyval_reset(pv);
+ else
+ ctmul64_polyval_reset(pv);
+}
+
+#elif defined(PV_USE_PCLMUL)
+PV_DECLARE(, ,
+ u128_from_bytes_pclmul,
+ u128_to_bytes_pclmul,
+ pv_xor_y_pclmul,
+ pv_mul_y_h_pclmul,
+ PV_BLOCK_STRIDE,
+ pv_expanded_key_t,
+ expand_key_pclmul,
+ pv_add_multiple_pclmul)
+#elif defined(PV_USE_CTMUL64)
+PV_DECLARE(, ,
+ u128_from_bytes_ctmul64,
+ u128_to_bytes_ctmul64,
+ pv_xor_y_ctmul64,
+ pv_mul_y_h_ctmul64,
+ BLOCK_STRIDE_NONE,
+ struct expanded_key_none,
+ expand_key_none,
+ add_multiple_none)
+
+#elif defined(PV_USE_CTMUL)
+PV_DECLARE(, , u128_from_bytes_ctmul,
+ u128_to_bytes_ctmul,
+ pv_xor_y_ctmul,
+ pv_mul_y_h_ctmul,
+ BLOCK_STRIDE_NONE,
+ struct expanded_key_none,
+ expand_key_none,
+ add_multiple_none)
+#endif
+
+#ifdef PV_USE_PCLMUL_DETECT
+void
+polyval_detect_implementation(void)
+{
+ unsigned int eax, ebc, ecx, edx;
+ use_pclmul = false;
+ if (__get_cpuid(1, &eax, &ebc, &ecx, &edx)) {
+ if (0 != (ecx & (1<<1))) {
+ use_pclmul = true;
+ }
+ }
+}
+#else
+void
+polyval_detect_implementation(void)
+{
+}
+#endif
+
+#ifdef POLYVAL_USE_EXPANDED_KEYS
+
+#ifdef PV_USE_PCLMUL_DETECT
+#define SHOULD_EXPAND() (use_pclmul)
+#else
+#define SHOULD_EXPAND() (1)
+#endif
+
+void
+polyvalx_init(polyvalx_t *pvx, const uint8_t *key)
+{
+ polyval_init(&pvx->pv, key);
+ if (SHOULD_EXPAND()) {
+ expand_key_pclmul(&pvx->pv, &pvx->expanded);
+ }
+}
+void
+polyvalx_init_from_key(polyvalx_t *pvx, const polyval_key_t *key)
+{
+ polyval_init_from_key(&pvx->pv, key);
+ if (SHOULD_EXPAND()) {
+ expand_key_pclmul(&pvx->pv, &pvx->expanded);
+ }
+}
+void
+polyvalx_add_block(polyvalx_t *pvx, const uint8_t *block)
+{
+ polyval_add_block(&pvx->pv, block);
+}
+void
+polyvalx_add_zpad(polyvalx_t *pvx, const uint8_t *data, size_t n)
+{
+ if (SHOULD_EXPAND() && n >= PV_BLOCK_STRIDE * 16) {
+ while (n > PV_BLOCK_STRIDE * 16) {
+ pv_add_multiple_pclmul(&pvx->pv, data, &pvx->expanded);
+ data += PV_BLOCK_STRIDE * 16;
+ n -= PV_BLOCK_STRIDE * 16;
+ }
+ }
while (n > 16) {
- polyval_add_block(pv, data);
+ polyval_add_block(&pvx->pv, data);
data += 16;
n -= 16;
}
@@ -230,19 +494,19 @@ polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n)
uint8_t block[16];
memset(&block, 0, sizeof(block));
memcpy(block, data, n);
- polyval_add_block(pv, block);
+ polyval_add_block(&pvx->pv, block);
}
}
void
-polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)
+polyvalx_get_tag(const polyvalx_t *pvx, uint8_t *tag_out)
{
- u128_to_bytes(pv->y, tag_out);
+ polyval_get_tag(&pvx->pv, tag_out);
}
-void
-polyval_reset(polyval_t *pv)
+void polyvalx_reset(polyvalx_t *pvx)
{
- memset(&pv->y, 0, sizeof(u128));
+ polyval_reset(&pvx->pv);
}
+#endif
#if 0
#include <stdio.h>
diff --git a/src/ext/polyval/polyval.h b/src/ext/polyval/polyval.h
@@ -16,12 +16,24 @@
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) \
|| defined(_M_X64) || defined(_M_IX86) || defined(__i486) \
|| defined(__i386__)
-/* Use intel intrinsics for carryless multiply.
- *
- * TODO: In theory we should detect whether we have the relevant instructions,
- * but they are all at least 15 years old.
+#define PV_INTEL_ARCH
+#endif
+
+#if defined(PV_INTEL_ARCH) && defined(__PCLMUL__)
+/* We're building for an architecture that always has the intel
+ * intrinsics for carryless multiply.
+ * No need for runtime detection.
+ */
+#define PV_USE_PCLMUL_UNCONDITIONAL
+#define PCLMUL_ANY
+
+#elif defined(PV_INTEL_ARCH) && SIZEOF_VOID_P >= 8
+/* We _might_ have PCLMUL, or we might not.
+ * We need to detect it at runtime.
*/
-#define PV_USE_PCLMUL
+#define PV_USE_PCLMUL_DETECT
+#define PCLMUL_ANY
+
#elif SIZEOF_VOID_P >= 8
/* It's a 64-bit architecture; use the generic 64-bit constant-time
* implementation.
@@ -36,13 +48,26 @@
#error "sizeof(void*) is implausibly weird."
#endif
+#ifdef PCLMUL_ANY
+#include <emmintrin.h>
+
+#define POLYVAL_USE_EXPANDED_KEYS
+#endif
+
/**
* Declare a 128 bit integer type.
# The exact representation will depend on which implementation we've chosen.
*/
-#ifdef PV_USE_PCLMUL
-#include <emmintrin.h>
+#if defined(PV_USE_PCLMUL_UNCONDITIONAL)
typedef __m128i pv_u128_;
+#elif defined(PV_USE_PCLMUL_DETECT)
+typedef union pv_u128_ {
+ __m128i u128x1;
+ struct {
+ uint64_t lo;
+ uint64_t hi;
+ } u64x2;
+} pv_u128_;
#elif defined(PV_USE_CTMUL64)
typedef struct pv_u128_ {
uint64_t lo;
@@ -117,4 +142,44 @@ void polyval_get_tag(const polyval_t *, uint8_t *tag_out);
*/
void polyval_reset(polyval_t *);
+/** If a faster-than-default polyval implementation is available, use it. */
+void polyval_detect_implementation(void);
+
+#ifdef POLYVAL_USE_EXPANDED_KEYS
+/* These variations are as for polyval_\*, but they use pre-expanded keys.
+ * They're appropriate when you know a key is likely to get used more than once
+ * on a large input.
+ */
+
+/** How many blocks to handle at once with an expanded key */
+#define PV_BLOCK_STRIDE 8
+typedef struct pv_expanded_key_t {
+ // powers of h in reverse order, down to 2.
+ // (in other words, contains
+ // h^PCLMUL_BLOCK_STRIDE .. H^2)
+ __m128i k[PV_BLOCK_STRIDE-1];
+} pv_expanded_key_t;
+typedef struct polyvalx_t {
+ polyval_t pv;
+ pv_expanded_key_t expanded;
+} polyvalx_t;
+
+void polyvalx_init(polyvalx_t *, const uint8_t *key);
+void polyvalx_init_from_key(polyvalx_t *, const polyval_key_t *key);
+void polyvalx_add_block(polyvalx_t *, const uint8_t *block);
+void polyvalx_add_zpad(polyvalx_t *, const uint8_t *data, size_t n);
+void polyvalx_get_tag(const polyvalx_t *, uint8_t *tag_out);
+void polyvalx_reset(polyvalx_t *);
+
+#else
+#define polyvalx_t polyval_t
+#define polyvalx_key_init polyval_key_init
+#define polyvalx_init polyval_init
+#define polyvalx_init_from_key polyval_init_from_key
+#define polyvalx_add_block polyval_add_block
+#define polyvalx_add_zpad polyval_add_zpad
+#define polyvalx_get_tag polyval_get_tag
+#define polyvalx_reset polyval_reset
+#endif
+
#endif
diff --git a/src/lib/crypt_ops/.may_include b/src/lib/crypt_ops/.may_include
@@ -26,3 +26,4 @@ keccak-tiny/*.h
ed25519/*.h
ext/siphash.h
+ext/polyval/*.h
diff --git a/src/lib/crypt_ops/aes.h b/src/lib/crypt_ops/aes.h
@@ -21,14 +21,14 @@ typedef struct aes_cnt_cipher_t aes_cnt_cipher_t;
aes_cnt_cipher_t* aes_new_cipher(const uint8_t *key, const uint8_t *iv,
int key_bits);
+void aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher_, const uint8_t *iv);
+void aes_cipher_set_key(aes_cnt_cipher_t *cipher_,
+ const uint8_t *key, int key_bits);
void aes_cipher_free_(aes_cnt_cipher_t *cipher);
#define aes_cipher_free(cipher) \
FREE_AND_NULL(aes_cnt_cipher_t, aes_cipher_free_, (cipher))
void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len);
-int evaluate_evp_for_aes(int force_value);
-int evaluate_ctr_for_aes(void);
-
#ifdef USE_AES_RAW
typedef struct aes_raw_t aes_raw_t;
@@ -40,27 +40,6 @@ void aes_raw_free_(aes_raw_t *cipher);
FREE_AND_NULL(aes_raw_t, aes_raw_free_, (cipher))
void aes_raw_encrypt(const aes_raw_t *cipher, uint8_t *block);
void aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block);
-
-void aes_raw_counter_xor(const aes_raw_t *aes,
- const uint8_t *iv, uint32_t iv_offset,
- uint8_t *data, size_t n);
-#endif
-
-#ifdef TOR_AES_PRIVATE
-#include "lib/arch/bytes.h"
-
-/** Increment the big-endian 128-bit counter in 'iv' by 'offset'. */
-static inline void
-aes_ctr_add_iv_offset(uint8_t *iv, uint32_t offset)
-{
-
- uint64_t h_hi = tor_ntohll(get_uint64(iv + 0));
- uint64_t h_lo = tor_ntohll(get_uint64(iv + 8));
- h_lo += offset;
- h_hi += (h_lo < offset);
- set_uint64(iv + 0, tor_htonll(h_hi));
- set_uint64(iv + 8, tor_htonll(h_lo));
-}
#endif
#endif /* !defined(TOR_AES_H) */
diff --git a/src/lib/crypt_ops/aes_nss.c b/src/lib/crypt_ops/aes_nss.c
@@ -23,9 +23,18 @@ DISABLE_GCC_WARNING("-Wstrict-prototypes")
#include <secerr.h>
ENABLE_GCC_WARNING("-Wstrict-prototypes")
-aes_cnt_cipher_t *
-aes_new_cipher(const uint8_t *key, const uint8_t *iv,
- int key_bits)
+struct aes_cnt_cipher_t {
+ PK11Context *context;
+ // We need to keep a copy of the key here since we can't set the IV only.
+ // It would be nice to fix that, but NSS doesn't see a huge number of
+ // users.
+ uint8_t kbytes;
+ uint8_t key[32];
+};
+
+static PK11Context *
+aes_new_cipher_internal(const uint8_t *key, const uint8_t *iv,
+ int key_bits)
{
const CK_MECHANISM_TYPE ckm = CKM_AES_CTR;
SECItem keyItem = { .type = siBuffer,
@@ -68,7 +77,18 @@ aes_new_cipher(const uint8_t *key, const uint8_t *iv,
PK11_FreeSlot(slot);
tor_assert(result);
- return (aes_cnt_cipher_t *)result;
+ return result;
+}
+
+aes_cnt_cipher_t *
+aes_new_cipher(const uint8_t *key, const uint8_t *iv,
+ int key_bits)
+{
+ aes_cnt_cipher_t *cipher = tor_malloc_zero(sizeof(*cipher));
+ cipher->context = aes_new_cipher_internal(key, iv, key_bits);
+ cipher->kbytes = key_bits / 8;
+ memcpy(cipher->key, key, cipher->kbytes);
+ return cipher;
}
void
@@ -76,7 +96,34 @@ aes_cipher_free_(aes_cnt_cipher_t *cipher)
{
if (!cipher)
return;
- PK11_DestroyContext((PK11Context*) cipher, PR_TRUE);
+ PK11_DestroyContext(cipher->context, PR_TRUE);
+ memwipe(cipher, 0, sizeof(*cipher));
+ tor_free(cipher);
+}
+
+void
+aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher, const uint8_t *iv)
+{
+ // For NSS, I could not find a method to change the IV
+ // of an existing context. Maybe I missed one?
+ PK11_DestroyContext(cipher->context, PR_TRUE);
+ cipher->context = aes_new_cipher_internal(cipher->key, iv,
+ 8*(int)cipher->kbytes);
+}
+
+void
+aes_cipher_set_key(aes_cnt_cipher_t *cipher,
+ const uint8_t *key, int key_bits)
+{
+ const uint8_t iv[16] = {0};
+ // For NSS, I could not find a method to change the key
+ // of an existing context. Maybe I missed one?
+ PK11_DestroyContext(cipher->context, PR_TRUE);
+ memwipe(cipher->key, 0, sizeof(cipher->key));
+
+ cipher->context = aes_new_cipher_internal(key, iv, key_bits);
+ cipher->kbytes = key_bits / 8;
+ memcpy(cipher->key, key, cipher->kbytes);
}
void
@@ -85,29 +132,15 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data_, size_t len_)
tor_assert(len_ <= INT_MAX);
SECStatus s;
- PK11Context *ctx = (PK11Context*)cipher;
unsigned char *data = (unsigned char *)data_;
int len = (int) len_;
int result_len = 0;
- s = PK11_CipherOp(ctx, data, &result_len, len, data, len);
+ s = PK11_CipherOp(cipher->context, data, &result_len, len, data, len);
tor_assert(s == SECSuccess);
tor_assert(result_len == len);
}
-int
-evaluate_evp_for_aes(int force_value)
-{
- (void)force_value;
- return 0;
-}
-
-int
-evaluate_ctr_for_aes(void)
-{
- return 0;
-}
-
aes_raw_t *
aes_raw_new(const uint8_t *key, int key_bits, bool encrypt)
{
@@ -186,37 +219,3 @@ aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block)
/* This is the same function call for NSS. */
aes_raw_encrypt(cipher, block);
}
-
-static inline void
-xor_bytes(uint8_t *outp, const uint8_t *inp, size_t n)
-{
- for (size_t i = 0; i < n; ++i) {
- outp[i] ^= inp[i];
- }
-}
-
-void
-aes_raw_counter_xor(const aes_raw_t *cipher,
- const uint8_t *iv, uint32_t iv_offset,
- uint8_t *data, size_t n)
-{
- uint8_t counter[16];
- uint8_t buf[16];
-
- memcpy(counter, iv, 16);
- aes_ctr_add_iv_offset(counter, iv_offset);
-
- while (n) {
- memcpy(buf, counter, 16);
- aes_raw_encrypt(cipher, buf);
- if (n >= 16) {
- xor_bytes(data, buf, 16);
- n -= 16;
- data += 16;
- } else {
- xor_bytes(data, buf, n);
- break;
- }
- aes_ctr_add_iv_offset(counter, 1);
- }
-}
diff --git a/src/lib/crypt_ops/aes_openssl.c b/src/lib/crypt_ops/aes_openssl.c
@@ -41,36 +41,43 @@ ENABLE_GCC_WARNING("-Wredundant-decls")
#include "lib/log/log.h"
#include "lib/ctime/di_ops.h"
-#ifdef OPENSSL_NO_ENGINE
-/* Android's OpenSSL seems to have removed all of its Engine support. */
-#define DISABLE_ENGINES
+/* Cached values of our EVP_CIPHER items. If we don't pre-fetch them,
+ * then EVP_CipherInit calls EVP_CIPHER_fetch itself,
+ * which is surprisingly expensive.
+ */
+static const EVP_CIPHER *aes128ctr = NULL;
+static const EVP_CIPHER *aes192ctr = NULL;
+static const EVP_CIPHER *aes256ctr = NULL;
+static const EVP_CIPHER *aes128ecb = NULL;
+static const EVP_CIPHER *aes192ecb = NULL;
+static const EVP_CIPHER *aes256ecb = NULL;
+
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(3,0,0) \
+ && !defined(LIBRESSL_VERSION_NUMBER)
+#define RESOLVE_CIPHER(c) \
+ EVP_CIPHER_fetch(NULL, OBJ_nid2sn(EVP_CIPHER_get_nid(c)), "")
+#else
+#define RESOLVE_CIPHER(c) (c)
#endif
-/* We have five strategies for implementing AES counter mode.
- *
- * Best with x86 and x86_64: Use EVP_aes_*_ctr() and EVP_EncryptUpdate().
- * This is possible with OpenSSL 1.0.1, where the counter-mode implementation
- * can use bit-sliced or vectorized AES or AESNI as appropriate.
- *
- * Otherwise: Pick the best possible AES block implementation that OpenSSL
- * gives us, and the best possible counter-mode implementation, and combine
- * them.
+/**
+ * Pre-fetch the versions of every AES cipher with its associated provider.
*/
-#if OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,1,0)
-
-/* With newer OpenSSL versions, the older fallback modes don't compile. So
- * don't use them, even if we lack specific acceleration. */
-
-#define USE_EVP_AES_CTR
-
-#elif OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,0,1) && \
- (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
- defined(__x86_64) || defined(__x86_64__) || \
- defined(_M_AMD64) || defined(_M_X64) || defined(__INTEL__))
-
-#define USE_EVP_AES_CTR
-
-#endif /* OPENSSL_VERSION_NUMBER >= OPENSSL_V_NOPATCH(1,1,0) || ... */
+static void
+init_ciphers(void)
+{
+ aes128ctr = RESOLVE_CIPHER(EVP_aes_128_ctr());
+ aes192ctr = RESOLVE_CIPHER(EVP_aes_192_ctr());
+ aes256ctr = RESOLVE_CIPHER(EVP_aes_256_ctr());
+ aes128ecb = RESOLVE_CIPHER(EVP_aes_128_ecb());
+ aes192ecb = RESOLVE_CIPHER(EVP_aes_192_ecb());
+ aes256ecb = RESOLVE_CIPHER(EVP_aes_256_ecb());
+}
+#define INIT_CIPHERS() STMT_BEGIN { \
+ if (PREDICT_UNLIKELY(NULL == aes128ctr)) { \
+ init_ciphers(); \
+ } \
+ } STMT_END
/* We have 2 strategies for getting the AES block cipher: Via OpenSSL's
* AES_encrypt function, or via OpenSSL's EVP_EncryptUpdate function.
@@ -91,30 +98,18 @@ ENABLE_GCC_WARNING("-Wredundant-decls")
* make sure that we have a fixed version.)
*/
-/* Helper function to use EVP with openssl's counter-mode wrapper. */
-static void
-evp_block128_fn(const uint8_t in[16],
- uint8_t out[16],
- const void *key)
-{
- EVP_CIPHER_CTX *ctx = (void*)key;
- int inl=16, outl=16;
- EVP_EncryptUpdate(ctx, out, &outl, in, inl);
-}
-
-#ifdef USE_EVP_AES_CTR
-
/* We don't actually define the struct here. */
aes_cnt_cipher_t *
aes_new_cipher(const uint8_t *key, const uint8_t *iv, int key_bits)
{
+ INIT_CIPHERS();
EVP_CIPHER_CTX *cipher = EVP_CIPHER_CTX_new();
const EVP_CIPHER *c = NULL;
switch (key_bits) {
- case 128: c = EVP_aes_128_ctr(); break;
- case 192: c = EVP_aes_192_ctr(); break;
- case 256: c = EVP_aes_256_ctr(); break;
+ case 128: c = aes128ctr; break;
+ case 192: c = aes192ctr; break;
+ case 256: c = aes256ctr; break;
default: tor_assert_unreached(); // LCOV_EXCL_LINE
}
EVP_EncryptInit(cipher, c, key, iv);
@@ -129,265 +124,56 @@ aes_cipher_free_(aes_cnt_cipher_t *cipher_)
EVP_CIPHER_CTX_reset(cipher);
EVP_CIPHER_CTX_free(cipher);
}
+
+/** Changes the key of the cipher;
+ * sets the IV to 0.
+ */
void
-aes_crypt_inplace(aes_cnt_cipher_t *cipher_, char *data, size_t len)
+aes_cipher_set_key(aes_cnt_cipher_t *cipher_, const uint8_t *key, int key_bits)
{
- int outl;
EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_;
-
- tor_assert(len < INT_MAX);
-
- EVP_EncryptUpdate(cipher, (unsigned char*)data,
- &outl, (unsigned char*)data, (int)len);
-}
-int
-evaluate_evp_for_aes(int force_val)
-{
- (void) force_val;
- log_info(LD_CRYPTO, "This version of OpenSSL has a known-good EVP "
- "counter-mode implementation. Using it.");
- return 0;
-}
-int
-evaluate_ctr_for_aes(void)
-{
- return 0;
-}
-#else /* !defined(USE_EVP_AES_CTR) */
-
-/*======================================================================*/
-/* Interface to AES code, and counter implementation */
-
-/** Implements an AES counter-mode cipher. */
-struct aes_cnt_cipher_t {
-/** This next element (however it's defined) is the AES key. */
- union {
- EVP_CIPHER_CTX evp;
- AES_KEY aes;
- } key;
-
-#if !defined(WORDS_BIGENDIAN)
-#define USING_COUNTER_VARS
- /** These four values, together, implement a 128-bit counter, with
- * counter0 as the low-order word and counter3 as the high-order word. */
- uint32_t counter3;
- uint32_t counter2;
- uint32_t counter1;
- uint32_t counter0;
-#endif /* !defined(WORDS_BIGENDIAN) */
-
- union {
- /** The counter, in big-endian order, as bytes. */
- uint8_t buf[16];
- /** The counter, in big-endian order, as big-endian words. Note that
- * on big-endian platforms, this is redundant with counter3...0,
- * so we just use these values instead. */
- uint32_t buf32[4];
- } ctr_buf;
-
- /** The encrypted value of ctr_buf. */
- uint8_t buf[16];
- /** Our current stream position within buf. */
- unsigned int pos;
-
- /** True iff we're using the evp implementation of this cipher. */
- uint8_t using_evp;
-};
-
-/** True iff we should prefer the EVP implementation for AES, either because
- * we're testing it or because we have hardware acceleration configured */
-static int should_use_EVP = 0;
-
-/** Check whether we should use the EVP interface for AES. If <b>force_val</b>
- * is nonnegative, we use use EVP iff it is true. Otherwise, we use EVP
- * if there is an engine enabled for aes-ecb. */
-int
-evaluate_evp_for_aes(int force_val)
-{
- ENGINE *e;
-
- if (force_val >= 0) {
- should_use_EVP = force_val;
- return 0;
- }
-#ifdef DISABLE_ENGINES
- should_use_EVP = 0;
-#else
- e = ENGINE_get_cipher_engine(NID_aes_128_ecb);
-
- if (e) {
- log_info(LD_CRYPTO, "AES engine \"%s\" found; using EVP_* functions.",
- ENGINE_get_name(e));
- should_use_EVP = 1;
- } else {
- log_info(LD_CRYPTO, "No AES engine found; using AES_* functions.");
- should_use_EVP = 0;
+ uint8_t iv[16] = {0};
+ const EVP_CIPHER *c = NULL;
+ switch (key_bits) {
+ case 128: c = aes128ctr; break;
+ case 192: c = aes192ctr; break;
+ case 256: c = aes256ctr; break;
+ default: tor_assert_unreached(); // LCOV_EXCL_LINE
}
-#endif /* defined(DISABLE_ENGINES) */
- return 0;
+ // No need to call EVP_CIPHER_CTX_Reset here; EncryptInit already
+ // does it for us.
+ EVP_EncryptInit(cipher, c, key, iv);
}
-
-/** Test the OpenSSL counter mode implementation to see whether it has the
- * counter-mode bug from OpenSSL 1.0.0. If the implementation works, then
- * we will use it for future encryption/decryption operations.
+/** Change the IV of this stream cipher without changing the key.
*
- * We can't just look at the OpenSSL version, since some distributions update
- * their OpenSSL packages without changing the version number.
- **/
-int
-evaluate_ctr_for_aes(void)
-{
- /* Result of encrypting an all-zero block with an all-zero 128-bit AES key.
- * This should be the same as encrypting an all-zero block with an all-zero
- * 128-bit AES key in counter mode, starting at position 0 of the stream.
- */
- static const unsigned char encrypt_zero[] =
- "\x66\xe9\x4b\xd4\xef\x8a\x2c\x3b\x88\x4c\xfa\x59\xca\x34\x2b\x2e";
- unsigned char zero[16];
- unsigned char output[16];
- unsigned char ivec[16];
- unsigned char ivec_tmp[16];
- unsigned int pos, i;
- AES_KEY key;
- memset(zero, 0, sizeof(zero));
- memset(ivec, 0, sizeof(ivec));
- AES_set_encrypt_key(zero, 128, &key);
-
- pos = 0;
- /* Encrypting a block one byte at a time should make the error manifest
- * itself for known bogus openssl versions. */
- for (i=0; i<16; ++i)
- AES_ctr128_encrypt(&zero[i], &output[i], 1, &key, ivec, ivec_tmp, &pos);
-
- if (fast_memneq(output, encrypt_zero, 16)) {
- /* Counter mode is buggy */
- /* LCOV_EXCL_START */
- log_err(LD_CRYPTO, "This OpenSSL has a buggy version of counter mode; "
- "quitting tor.");
- exit(1); // exit ok: openssl is broken.
- /* LCOV_EXCL_STOP */
- }
- return 0;
-}
-
-#if !defined(USING_COUNTER_VARS)
-#define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
-#else
-#define COUNTER(c, n) ((c)->counter ## n)
-#endif
-
-static void aes_set_key(aes_cnt_cipher_t *cipher, const uint8_t *key,
- int key_bits);
-static void aes_set_iv(aes_cnt_cipher_t *cipher, const uint8_t *iv);
-
-/**
- * Return a newly allocated counter-mode AES128 cipher implementation,
- * using the 128-bit key <b>key</b> and the 128-bit IV <b>iv</b>.
- */
-aes_cnt_cipher_t*
-aes_new_cipher(const uint8_t *key, const uint8_t *iv, int bits)
-{
- aes_cnt_cipher_t* result = tor_malloc_zero(sizeof(aes_cnt_cipher_t));
-
- aes_set_key(result, key, bits);
- aes_set_iv(result, iv);
-
- return result;
-}
-
-/** Set the key of <b>cipher</b> to <b>key</b>, which is
- * <b>key_bits</b> bits long (must be 128, 192, or 256). Also resets
- * the counter to 0.
+ * Requires that the cipher stream position is at an even multiple of 16 bytes.
*/
-static void
-aes_set_key(aes_cnt_cipher_t *cipher, const uint8_t *key, int key_bits)
+void
+aes_cipher_set_iv_aligned(aes_cnt_cipher_t *cipher_, const uint8_t *iv)
{
- if (should_use_EVP) {
- const EVP_CIPHER *c = 0;
- switch (key_bits) {
- case 128: c = EVP_aes_128_ecb(); break;
- case 192: c = EVP_aes_192_ecb(); break;
- case 256: c = EVP_aes_256_ecb(); break;
- default: tor_assert(0); // LCOV_EXCL_LINE
- }
- EVP_EncryptInit(&cipher->key.evp, c, key, NULL);
- cipher->using_evp = 1;
- } else {
- AES_set_encrypt_key(key, key_bits,&cipher->key.aes);
- cipher->using_evp = 0;
- }
-
-#ifdef USING_COUNTER_VARS
- cipher->counter0 = 0;
- cipher->counter1 = 0;
- cipher->counter2 = 0;
- cipher->counter3 = 0;
-#endif /* defined(USING_COUNTER_VARS) */
-
- memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
-
- cipher->pos = 0;
+ EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_;
+#ifdef LIBRESSL_VERSION_NUMBER
+ EVP_CIPHER_CTX_set_iv(cipher, iv, 16);
+#else
+ // We would have to do this if the cipher's position were not aligned:
+ // EVP_CIPHER_CTX_set_num(cipher, 0);
- memset(cipher->buf, 0, sizeof(cipher->buf));
+ memcpy(EVP_CIPHER_CTX_iv_noconst(cipher), iv, 16);
+#endif
}
-
-/** Release storage held by <b>cipher</b>
- */
void
-aes_cipher_free_(aes_cnt_cipher_t *cipher)
+aes_crypt_inplace(aes_cnt_cipher_t *cipher_, char *data, size_t len)
{
- if (!cipher)
- return;
- if (cipher->using_evp) {
- EVP_CIPHER_CTX_cleanup(&cipher->key.evp);
- }
- memwipe(cipher, 0, sizeof(aes_cnt_cipher_t));
- tor_free(cipher);
-}
+ int outl;
+ EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *) cipher_;
-#if defined(USING_COUNTER_VARS)
-#define UPDATE_CTR_BUF(c, n) STMT_BEGIN \
- (c)->ctr_buf.buf32[3-(n)] = htonl((c)->counter ## n); \
- STMT_END
-#else
-#define UPDATE_CTR_BUF(c, n)
-#endif /* defined(USING_COUNTER_VARS) */
+ tor_assert(len < INT_MAX);
-/** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
- * Uses the key in <b>cipher</b>, and advances the counter by <b>len</b> bytes
- * as it encrypts.
- */
-void
-aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
-{
- /* Note that the "128" below refers to the length of the counter,
- * not the length of the AES key. */
- if (cipher->using_evp) {
- /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h. If
- * it weren't disabled, it might be better just to use that.
- */
- CRYPTO_ctr128_encrypt((const unsigned char *)data,
- (unsigned char *)data,
- len,
- &cipher->key.evp,
- cipher->ctr_buf.buf,
- cipher->buf,
- &cipher->pos,
- evp_block128_fn);
- } else {
- AES_ctr128_encrypt((const unsigned char *)data,
- (unsigned char *)data,
- len,
- &cipher->key.aes,
- cipher->ctr_buf.buf,
- cipher->buf,
- &cipher->pos);
- }
+ EVP_EncryptUpdate(cipher, (unsigned char*)data,
+ &outl, (unsigned char*)data, (int)len);
}
-#endif /* defined(USE_EVP_AES_CTR) */
-
/* ========
* Functions for "raw" (ECB) AES.
*
@@ -406,16 +192,19 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
aes_raw_t *
aes_raw_new(const uint8_t *key, int key_bits, bool encrypt)
{
+ INIT_CIPHERS();
EVP_CIPHER_CTX *cipher = EVP_CIPHER_CTX_new();
tor_assert(cipher);
const EVP_CIPHER *c = NULL;
switch (key_bits) {
- case 128: c = EVP_aes_128_ecb(); break;
- case 192: c = EVP_aes_192_ecb(); break;
- case 256: c = EVP_aes_256_ecb(); break;
+ case 128: c = aes128ecb; break;
+ case 192: c = aes192ecb; break;
+ case 256: c = aes256ecb; break;
default: tor_assert_unreached();
}
+ // No need to call EVP_CIPHER_CTX_Reset here; EncryptInit already
+ // does it for us.
int r = EVP_CipherInit(cipher, c, key, NULL, encrypt);
tor_assert(r == 1);
EVP_CIPHER_CTX_set_padding(cipher, 0);
@@ -432,14 +221,13 @@ aes_raw_set_key(aes_raw_t **cipher_, const uint8_t *key,
{
const EVP_CIPHER *c = *(EVP_CIPHER**) cipher_;
switch (key_bits) {
- case 128: c = EVP_aes_128_ecb(); break;
- case 192: c = EVP_aes_192_ecb(); break;
- case 256: c = EVP_aes_256_ecb(); break;
+ case 128: c = aes128ecb; break;
+ case 192: c = aes192ecb; break;
+ case 256: c = aes256ecb; break;
default: tor_assert_unreached();
}
aes_raw_t *cipherp = *cipher_;
EVP_CIPHER_CTX *cipher = (EVP_CIPHER_CTX *)cipherp;
- EVP_CIPHER_CTX_reset(cipher);
int r = EVP_CipherInit(cipher, c, key, NULL, encrypt);
tor_assert(r == 1);
EVP_CIPHER_CTX_set_padding(cipher, 0);
@@ -487,30 +275,3 @@ aes_raw_decrypt(const aes_raw_t *cipher, uint8_t *block)
tor_assert(r == 1);
tor_assert(outl == 16);
}
-
-/**
- * Use the AES encryption key AES in counter mode,
- * starting at the position (iv + iv_offset)*16,
- * to encrypt the 'n' bytes of data in 'data'.
- *
- * Unlike aes_crypt_inplace, this function can re-use the same key repeatedly
- * with diferent IVs.
- */
-void
-aes_raw_counter_xor(const aes_raw_t *cipher,
- const uint8_t *iv, uint32_t iv_offset,
- uint8_t *data, size_t n)
-{
- uint8_t counter[16];
- uint8_t buf[16];
- unsigned int pos = 0;
-
- memcpy(counter, iv, 16);
- if (iv_offset) {
- aes_ctr_add_iv_offset(counter, iv_offset);
- }
-
- CRYPTO_ctr128_encrypt(data, data, n,
- (EVP_CIPHER_CTX *)cipher,
- counter, buf, &pos, evp_block128_fn);
-}
diff --git a/src/lib/crypt_ops/crypto_init.c b/src/lib/crypt_ops/crypto_init.c
@@ -26,6 +26,7 @@
#include "lib/crypt_ops/crypto_options_st.h"
#include "lib/conf/conftypes.h"
#include "lib/log/util_bug.h"
+#include "ext/polyval/polyval.h"
#include "lib/subsys/subsys.h"
@@ -69,6 +70,8 @@ crypto_early_init(void)
crypto_nss_early_init(0);
#endif
+ polyval_detect_implementation();
+
if (crypto_seed_rng() < 0)
return -1;
if (crypto_init_siphash_key() < 0)
diff --git a/src/lib/crypt_ops/crypto_openssl_mgt.c b/src/lib/crypt_ops/crypto_openssl_mgt.c
@@ -323,9 +323,6 @@ crypto_openssl_late_init(int useAccel, const char *accelName,
return -1;
}
- evaluate_evp_for_aes(-1);
- evaluate_ctr_for_aes();
-
return 0;
}
diff --git a/src/test/bench.c b/src/test/bench.c
@@ -23,6 +23,9 @@
#include <openssl/obj_mac.h>
#endif /* defined(ENABLE_OPENSSL) */
+#include <math.h>
+
+#include "ext/polyval/polyval.h"
#include "core/or/circuitlist.h"
#include "app/config/config.h"
#include "app/main/subsysmgr.h"
@@ -33,6 +36,7 @@
#include "lib/crypt_ops/crypto_rand.h"
#include "feature/dircommon/consdiff.h"
#include "lib/compress/compress.h"
+#include "core/crypto/relay_crypto_cgo.h"
#include "core/or/cell_st.h"
#include "core/or/or_circuit_st.h"
@@ -43,6 +47,27 @@
#include "feature/dirparse/microdesc_parse.h"
#include "feature/nodelist/microdesc.h"
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) \
+ || defined(_M_X64) || defined(_M_IX86) || defined(__i486) \
+ || defined(__i386__)
+#define INTEL
+#endif
+
+#ifdef INTEL
+#include "x86intrin.h"
+
+static inline uint64_t
+cycles(void)
+{
+ return __rdtsc();
+}
+#define cpb(start, end, bytes) \
+ (((double)(end - start)) / (bytes))
+#else
+#define cycles() 0
+#define cpb(start,end,bytes) ((void)(start+end+bytes), (double)NAN)
+#endif
+
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID)
static uint64_t nanostart;
static inline uint64_t
@@ -498,9 +523,9 @@ bench_digest(void)
}
static void
-bench_cell_ops(void)
+bench_cell_ops_tor1(void)
{
- const int iters = 1<<16;
+ const int iters = 1<<20;
int i;
/* benchmarks for cell ops at relay. */
@@ -508,6 +533,10 @@ bench_cell_ops(void)
cell_t *cell = tor_malloc(sizeof(cell_t));
int outbound;
uint64_t start, end;
+ uint64_t cstart, cend;
+
+ // TODO CGO: use constant after this is merged or rebased.
+ const unsigned payload_len = 498;
crypto_rand((char*)cell->payload, sizeof(cell->payload));
@@ -529,18 +558,35 @@ bench_cell_ops(void)
for (outbound = 0; outbound <= 1; ++outbound) {
cell_direction_t d = outbound ? CELL_DIRECTION_OUT : CELL_DIRECTION_IN;
start = perftime();
+ cstart = cycles();
for (i = 0; i < iters; ++i) {
char recognized = 0;
crypt_path_t *layer_hint = NULL;
relay_decrypt_cell(TO_CIRCUIT(or_circ), cell, d,
&layer_hint, &recognized);
}
+ cend = cycles();
end = perftime();
- printf("%sbound cells: %.2f ns per cell. (%.2f ns per byte of payload)\n",
+ printf("%sbound cells: %.2f ns per cell. "
+ "(%.2f ns per byte of payload, %.2f cpb)\n",
outbound?"Out":" In",
NANOCOUNT(start,end,iters),
- NANOCOUNT(start,end,iters*CELL_PAYLOAD_SIZE));
+ NANOCOUNT(start,end,iters * payload_len),
+ cpb(cstart, cend, iters * payload_len));
+ }
+
+ start = perftime();
+ cstart = cycles();
+ for (i = 0; i < iters; ++i) {
+ relay_encrypt_cell_inbound(cell, or_circ);
}
+ cend = cycles();
+ end = perftime();
+ printf("originate inbound : %.2f ns per cell. "
+ "(%.2f ns per payload byte, %.2f cpb)\n",
+ NANOCOUNT(start, end, iters),
+ NANOCOUNT(start, end, iters * payload_len),
+ cpb(cstart, cend, iters*payload_len));
relay_crypto_clear(&or_circ->crypto);
tor_free(or_circ);
@@ -548,6 +594,126 @@ bench_cell_ops(void)
}
static void
+bench_polyval(void)
+{
+ polyval_t pv;
+ polyvalx_t pvx;
+ uint8_t key[16];
+ uint8_t input[512];
+ uint64_t start, end, cstart, cend;
+ crypto_rand((char*) key, sizeof(key));
+ crypto_rand((char*) input, sizeof(input));
+
+ const int iters = 1<<20;
+
+ polyval_init(&pv, key);
+ start = perftime();
+ cstart = cycles();
+ for (int i = 0; i < iters; ++i) {
+ polyval_add_block(&pv, input);
+ }
+ cend = cycles();
+ end = perftime();
+ printf("polyval (add 16): %.2f ns; %.2f cpb\n",
+ NANOCOUNT(start, end, iters),
+ cpb(cstart, cend, iters * 16));
+
+ start = perftime();
+ cstart = cycles();
+ for (int i = 0; i < iters; ++i) {
+ polyval_add_zpad(&pv, input, 512);
+ }
+ cend = cycles();
+ end = perftime();
+ printf("polyval (add 512): %.2f ns; %.2f cpb\n",
+ NANOCOUNT(start, end, iters),
+ cpb(cstart, cend, iters * 512));
+
+ polyvalx_init(&pvx, key);
+ start = perftime();
+ cstart = cycles();
+ for (int i = 0; i < iters; ++i) {
+ polyvalx_add_zpad(&pvx, input, 512);
+ }
+ cend = cycles();
+ end = perftime();
+ printf("polyval (add 512, pre-expanded key): %.2f ns; %.2f cpb\n",
+ NANOCOUNT(start, end, iters),
+ cpb(cstart, cend, iters * 512));
+}
+
+static void
+bench_cell_ops_cgo(void)
+{
+ const int iters = 1<<20;
+
+ /* benchmarks for cell ops at relay. */
+ cell_t *cell = tor_malloc(sizeof(cell_t));
+
+ uint64_t start, end;
+ uint64_t cstart, cend;
+
+ const uint8_t *tag = NULL;
+ size_t keylen = cgo_key_material_len(128);
+ uint8_t *keys = tor_malloc(keylen);
+ crypto_rand((char*) keys, keylen);
+
+ // We're using the version of this constant that _does_ include
+ // stream IDs, for an apples-to-apples comparison with tor1.
+ //
+ // TODO CGO: use constant after this is merged or rebased.
+ const unsigned payload_len = 488;
+
+ memset(cell, 0, sizeof(*cell));
+
+#define SHOW(operation) \
+ printf("%s: %.2f per cell (%.2f cpb)\n", \
+ (operation), \
+ NANOCOUNT(start,end,iters), \
+ cpb(cstart, cend, (double)iters * payload_len))
+
+ // Initialize crypto
+ cgo_crypt_t *r_f = cgo_crypt_new(CGO_MODE_RELAY_FORWARD, 128, keys, keylen);
+ cgo_crypt_t *r_b = cgo_crypt_new(CGO_MODE_RELAY_BACKWARD, 128, keys, keylen);
+
+ reset_perftime();
+
+ start = perftime();
+ cstart = cycles();
+ for (int i=0; i < iters; ++i) {
+ cgo_crypt_relay_forward(r_f, cell, &tag);
+ }
+ cend = cycles();
+ end = perftime();
+ SHOW("CGO outbound at relay");
+
+ start = perftime();
+ cstart = cycles();
+ for (int i=0; i < iters; ++i) {
+ cgo_crypt_relay_backward(r_b, cell);
+ }
+ cend = cycles();
+ end = perftime();
+ SHOW("CGO inbound at relay");
+
+ start = perftime();
+ cstart = cycles();
+ for (int i=0; i < iters; ++i) {
+ cgo_crypt_relay_originate(r_b, cell, &tag);
+ }
+ cend = cycles();
+ end = perftime();
+ SHOW("CGO originate at relay");
+
+ tor_free(cell);
+ tor_free(keys);
+ cgo_crypt_free(r_f);
+ cgo_crypt_free(r_b);
+
+#undef SHOW
+}
+
+static void
bench_dh(void)
{
const int iters = 1<<10;
@@ -683,13 +849,15 @@ static struct benchmark_t benchmarks[] = {
ENT(dmap),
ENT(siphash),
ENT(digest),
+ ENT(polyval),
ENT(aes),
ENT(onion_ntor),
ENT(ed25519),
ENT(rand),
ENT(cell_aes),
- ENT(cell_ops),
+ ENT(cell_ops_tor1),
+ ENT(cell_ops_cgo),
ENT(dh),
#ifdef ENABLE_OPENSSL
diff --git a/src/test/test_crypto.c b/src/test/test_crypto.c
@@ -259,14 +259,12 @@ test_crypto_openssl_version(void *arg)
static void
test_crypto_aes128(void *arg)
{
+ (void)arg;
char *data1 = NULL, *data2 = NULL, *data3 = NULL;
crypto_cipher_t *env1 = NULL, *env2 = NULL;
int i, j;
char *mem_op_hex_tmp=NULL;
char key[CIPHER_KEY_LEN];
- int use_evp = !strcmp(arg,"evp");
- evaluate_evp_for_aes(use_evp);
- evaluate_ctr_for_aes();
data1 = tor_malloc(1024);
data2 = tor_malloc(1024);
@@ -1634,14 +1632,12 @@ test_crypto_formats(void *arg)
static void
test_crypto_aes_iv(void *arg)
{
+ (void)arg;
char *plain, *encrypted1, *encrypted2, *decrypted1, *decrypted2;
char plain_1[1], plain_15[15], plain_16[16], plain_17[17];
char key1[16], key2[16];
ssize_t encrypted_size, decrypted_size;
- int use_evp = !strcmp(arg,"evp");
- evaluate_evp_for_aes(use_evp);
-
plain = tor_malloc(4095);
encrypted1 = tor_malloc(4095 + 1 + 16);
encrypted2 = tor_malloc(4095 + 1 + 16);
@@ -3201,6 +3197,7 @@ test_crypto_polyval(void *arg)
uint8_t output[16];
uint8_t output2[16];
char *mem_op_hex_tmp=NULL;
+ uint8_t *longer = NULL;
// From RFC 8452
const char *key_hex = "25629347589242761d31f826ba4b757b";
@@ -3236,8 +3233,38 @@ test_crypto_polyval(void *arg)
polyval_get_tag(&pv, output2);
tt_mem_op(output, OP_EQ, output2, 16);
+ // Try a long input both ways, and make sure the answer is the same.
+ longer = tor_malloc_zero(4096);
+ crypto_rand((char *)longer, 4090); // leave zeros at the end.
+ polyval_reset(&pv);
+ polyval_add_zpad(&pv, longer, 4090);
+ polyval_get_tag(&pv, output);
+
+ polyval_reset(&pv);
+ const uint8_t *cp;
+ for (cp = longer; cp < longer + 4096; cp += 16) {
+ polyval_add_block(&pv, cp);
+ }
+ polyval_get_tag(&pv, output2);
+ tt_mem_op(output, OP_EQ, output2, 16);
+
+ // Now the same with polyvalx.
+ polyvalx_t pvx;
+ polyvalx_init(&pvx, key);
+ polyvalx_add_zpad(&pvx, longer, 4090);
+ polyvalx_get_tag(&pvx, output2);
+ tt_mem_op(output, OP_EQ, output2, 16);
+
+ polyvalx_reset(&pvx);
+ for (cp = longer; cp < longer + 4096; cp += 16) {
+ polyvalx_add_block(&pvx, cp);
+ }
+ polyvalx_get_tag(&pvx, output2);
+ tt_mem_op(output, OP_EQ, output2, 16);
+
done:
tor_free(mem_op_hex_tmp);
+ tor_free(longer);
}
static void
@@ -3309,103 +3336,102 @@ test_crypto_aes_raw(void *arg)
#undef T
}
+/** Make sure that we can set keys on live AES instances correctly. */
static void
-test_crypto_aes_raw_ctr_equiv(void *arg)
+test_crypto_aes_keymanip_cnt(void *arg)
{
(void) arg;
- size_t buflen = 65536;
- uint8_t *buf = tor_malloc_zero(buflen);
- aes_cnt_cipher_t *c = NULL;
- aes_raw_t *c_raw = NULL;
-
- const uint8_t iv[16];
- const uint8_t key[16];
-
- // Simple case, IV with zero offset.
- for (int i = 0; i < 32; ++i) {
- crypto_rand((char*)iv, sizeof(iv));
- crypto_rand((char*)key, sizeof(key));
- c = aes_new_cipher(key, iv, 128);
- c_raw = aes_raw_new(key, 128, true);
-
- aes_crypt_inplace(c, (char*)buf, buflen);
- aes_raw_counter_xor(c_raw, iv, 0, buf, buflen);
- tt_assert(fast_mem_is_zero((char*)buf, buflen));
-
- aes_cipher_free(c);
- aes_raw_free(c_raw);
- }
- // Trickier case, IV with offset == 31.
- for (int i = 0; i < 32; ++i) {
- crypto_rand((char*)iv, sizeof(iv));
- crypto_rand((char*)key, sizeof(key));
- c = aes_new_cipher(key, iv, 128);
- c_raw = aes_raw_new(key, 128, true);
-
- aes_crypt_inplace(c, (char*)buf, buflen);
- size_t off = 31*16;
- aes_raw_counter_xor(c_raw, iv, 31, buf + off, buflen - off);
- tt_assert(fast_mem_is_zero((char*)buf + off, buflen - off));
-
- aes_cipher_free(c);
- aes_raw_free(c_raw);
- }
+ uint8_t k1[16] = "123456780123678";
+ uint8_t k2[16] = "abcdefghijklmno";
+ int kbits = 128;
+ uint8_t iv1[16]= "{return 4;}////";
+ uint8_t iv2[16] = {0};
+ uint8_t buf[128] = {0};
+ uint8_t buf2[128] = {0};
+
+ aes_cnt_cipher_t *aes = aes_new_cipher(k1, iv1, kbits);
+ aes_crypt_inplace(aes, (char*)buf, sizeof(buf));
+
+ aes_cnt_cipher_t *aes2 = aes_new_cipher(k2, iv2, kbits);
+ // 128-5 to make sure internal buf is cleared when we set key.
+ aes_crypt_inplace(aes2, (char*)buf2, sizeof(buf2)-5);
+ aes_cipher_set_key(aes2, k1, kbits);
+ aes_cipher_set_iv_aligned(aes2, iv1); // should work in this case.
+ memset(buf2, 0, sizeof(buf2));
+ aes_crypt_inplace(aes2, (char*)buf2, sizeof(buf2));
+ tt_mem_op(buf, OP_EQ, buf2, sizeof(buf));
done:
- aes_cipher_free(c);
- aes_raw_free(c_raw);
- tor_free(buf);
+ aes_cipher_free(aes);
+ aes_cipher_free(aes2);
}
-/* Make sure that our IV addition code is correct.
- *
- * We test this function separately to make sure we handle corner cases well;
- * the corner cases are rare enough that we shouldn't expect to see them in
- * randomized testing.
- */
static void
-test_crypto_aes_cnt_iv_manip(void *arg)
+test_crypto_aes_keymanip_ecb(void *arg)
{
- (void)arg;
- uint8_t buf[16];
- uint8_t expect[16];
- int n;
-#define T(pre, off, post) STMT_BEGIN { \
- n = base16_decode((char*)buf, sizeof(buf), \
- (pre), strlen(pre)); \
- tt_int_op(n, OP_EQ, sizeof(buf)); \
- n = base16_decode((char*)expect, sizeof(expect), \
- (post), strlen(post)); \
- tt_int_op(n, OP_EQ, sizeof(expect)); \
- aes_ctr_add_iv_offset(buf, (off)); \
- tt_mem_op(buf, OP_EQ, expect, 16); \
- } STMT_END
-
- T("00000000000000000000000000000000", 0x4032,
- "00000000000000000000000000004032");
- T("0000000000000000000000000000ffff", 0x4032,
- "00000000000000000000000000014031");
- // We focus on "31" here because that's what CGO uses.
- T("000000000000000000000000ffffffe0", 31,
- "000000000000000000000000ffffffff");
- T("000000000000000000000000ffffffe1", 31,
- "00000000000000000000000100000000");
- T("0000000100000000ffffffffffffffe0", 31,
- "0000000100000000ffffffffffffffff");
- T("0000000100000000ffffffffffffffe1", 31,
- "00000001000000010000000000000000");
- T("0000000ffffffffffffffffffffffff0", 31,
- "0000001000000000000000000000000f");
- T("ffffffffffffffffffffffffffffffe0", 31,
- "ffffffffffffffffffffffffffffffff");
- T("ffffffffffffffffffffffffffffffe1", 31,
- "00000000000000000000000000000000");
- T("ffffffffffffffffffffffffffffffe8", 31,
- "00000000000000000000000000000007");
+ (void) arg;
+ uint8_t k1[16] = "123456780123678";
+ uint8_t k2[16] = "abcdefghijklmno";
+ int kbits = 128;
+ uint8_t buf_orig[16] = {1,2,3,0};
+ uint8_t buf1[16];
+ uint8_t buf2[16];
+
+ aes_raw_t *aes1 = aes_raw_new(k1, kbits, true);
+ aes_raw_t *aes2 = aes_raw_new(k1, kbits, false);
+ aes_raw_set_key(&aes2, k2, kbits, false);
+
+ memcpy(buf1, buf_orig, 16);
+ memcpy(buf2, buf_orig, 16);
+
+ aes_raw_encrypt(aes1, buf1);
+ aes_raw_encrypt(aes1, buf2);
+ tt_mem_op(buf1, OP_EQ, buf2, 16);
+
+ aes_raw_decrypt(aes2, buf1);
+ aes_raw_set_key(&aes2, k1, kbits, false);
+ aes_raw_decrypt(aes2, buf2);
+
+ tt_mem_op(buf1, OP_NE, buf2, 16);
+ tt_mem_op(buf2, OP_EQ, buf_orig, 16);
-#undef T
done:
- ;
+ aes_raw_free(aes1);
+ aes_raw_free(aes2);
+}
+
+static void
+test_crypto_aes_cnt_set_iv(void *arg)
+{
+ (void)arg;
+ uint8_t k1[16] = "123456780123678";
+ uint8_t iv_zero[16] = {0};
+ int kbits = 128;
+ const int iters = 100;
+ uint8_t buf1[128];
+ uint8_t buf2[128];
+
+ aes_cnt_cipher_t *aes1, *aes2 = NULL;
+ aes1 = aes_new_cipher(k1, iv_zero, kbits);
+
+ for (int i = 0; i < iters; ++i) {
+ uint8_t iv[16];
+ crypto_rand((char*) iv, sizeof(iv));
+ memset(buf1, 0, sizeof(buf1));
+ memset(buf2, 0, sizeof(buf2));
+
+ aes_cipher_set_iv_aligned(aes1, iv);
+ aes2 = aes_new_cipher(k1, iv, kbits);
+
+ aes_crypt_inplace(aes1, (char*)buf1, sizeof(buf1));
+ aes_crypt_inplace(aes2, (char*)buf1, sizeof(buf2));
+ tt_mem_op(buf1, OP_EQ, buf2, sizeof(buf1));
+
+ aes_cipher_free(aes2);
+ }
+ done:
+ aes_cipher_free(aes1);
+ aes_cipher_free(aes2);
}
#ifndef COCCI
@@ -3424,8 +3450,7 @@ test_crypto_aes_cnt_iv_manip(void *arg)
struct testcase_t crypto_tests[] = {
CRYPTO_LEGACY(formats),
{ "openssl_version", test_crypto_openssl_version, TT_FORK, NULL, NULL },
- { "aes_AES", test_crypto_aes128, TT_FORK, &passthrough_setup, (void*)"aes" },
- { "aes_EVP", test_crypto_aes128, TT_FORK, &passthrough_setup, (void*)"evp" },
+ { "aes_AES", test_crypto_aes128, TT_FORK, NULL, NULL },
{ "aes128_ctr_testvec", test_crypto_aes_ctr_testvec, 0,
&passthrough_setup, (void*)"128" },
{ "aes192_ctr_testvec", test_crypto_aes_ctr_testvec, 0,
@@ -3446,10 +3471,7 @@ struct testcase_t crypto_tests[] = {
{ "sha3_xof", test_crypto_sha3_xof, TT_FORK, NULL, NULL},
{ "mac_sha3", test_crypto_mac_sha3, TT_FORK, NULL, NULL},
CRYPTO_LEGACY(dh),
- { "aes_iv_AES", test_crypto_aes_iv, TT_FORK, &passthrough_setup,
- (void*)"aes" },
- { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, &passthrough_setup,
- (void*)"evp" },
+ { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, NULL, NULL },
CRYPTO_LEGACY(base32_decode),
{ "kdf_TAP", test_crypto_kdf_TAP, 0, NULL, NULL },
{ "hkdf_sha256", test_crypto_hkdf_sha256, 0, NULL, NULL },
@@ -3477,7 +3499,8 @@ struct testcase_t crypto_tests[] = {
{ "failure_modes", test_crypto_failure_modes, TT_FORK, NULL, NULL },
{ "polyval", test_crypto_polyval, 0, NULL, NULL },
{ "aes_raw", test_crypto_aes_raw, 0, NULL, NULL },
- { "aes_raw_ctr_equiv", test_crypto_aes_raw_ctr_equiv, 0, NULL, NULL },
- { "aes_cnt_iv_manip", test_crypto_aes_cnt_iv_manip, 0, NULL, NULL },
+ { "aes_keymanip_cnt", test_crypto_aes_keymanip_cnt, 0, NULL, NULL },
+ { "aes_keymanip_ecb", test_crypto_aes_keymanip_ecb, 0, NULL, NULL },
+ { "aes_cnt_set_iv", test_crypto_aes_cnt_set_iv, 0, NULL, NULL },
END_OF_TESTCASES
};