tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

AesOpt.c (4651B)


      1 /* AesOpt.c -- Intel's AES
      2 2017-06-08 : Igor Pavlov : Public domain */
      3 
      4 #include "Precomp.h"
      5 
      6 #include "CpuArch.h"
      7 
      8 #ifdef MY_CPU_X86_OR_AMD64
      9 #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
     10 #define USE_INTEL_AES
     11 #endif
     12 #endif
     13 
     14 #ifdef USE_INTEL_AES
     15 
     16 #include <wmmintrin.h>
     17 
     18 void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
     19 {
     20  __m128i m = *p;
     21  for (; numBlocks != 0; numBlocks--, data++)
     22  {
     23    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
     24    const __m128i *w = p + 3;
     25    m = _mm_xor_si128(m, *data);
     26    m = _mm_xor_si128(m, p[2]);
     27    do
     28    {
     29      m = _mm_aesenc_si128(m, w[0]);
     30      m = _mm_aesenc_si128(m, w[1]);
     31      w += 2;
     32    }
     33    while (--numRounds2 != 0);
     34    m = _mm_aesenc_si128(m, w[0]);
     35    m = _mm_aesenclast_si128(m, w[1]);
     36    *data = m;
     37  }
     38  *p = m;
     39 }
     40 
     41 #define NUM_WAYS 3
     42 
     43 #define AES_OP_W(op, n) { \
     44    const __m128i t = w[n]; \
     45    m0 = op(m0, t); \
     46    m1 = op(m1, t); \
     47    m2 = op(m2, t); \
     48    }
     49 
     50 #define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
     51 #define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
     52 #define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
     53 #define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
     54 
     55 void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
     56 {
     57  __m128i iv = *p;
     58  for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
     59  {
     60    UInt32 numRounds2 = *(const UInt32 *)(p + 1);
     61    const __m128i *w = p + numRounds2 * 2;
     62    __m128i m0, m1, m2;
     63    {
     64      const __m128i t = w[2];
     65      m0 = _mm_xor_si128(t, data[0]);
     66      m1 = _mm_xor_si128(t, data[1]);
     67      m2 = _mm_xor_si128(t, data[2]);
     68    }
     69    numRounds2--;
     70    do
     71    {
     72      AES_DEC(1)
     73      AES_DEC(0)
     74      w -= 2;
     75    }
     76    while (--numRounds2 != 0);
     77    AES_DEC(1)
     78    AES_DEC_LAST(0)
     79 
     80    {
     81      __m128i t;
     82      t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
     83      t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
     84      t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
     85    }
     86  }
     87  for (; numBlocks != 0; numBlocks--, data++)
     88  {
     89    UInt32 numRounds2 = *(const UInt32 *)(p + 1);
     90    const __m128i *w = p + numRounds2 * 2;
     91    __m128i m = _mm_xor_si128(w[2], *data);
     92    numRounds2--;
     93    do
     94    {
     95      m = _mm_aesdec_si128(m, w[1]);
     96      m = _mm_aesdec_si128(m, w[0]);
     97      w -= 2;
     98    }
     99    while (--numRounds2 != 0);
    100    m = _mm_aesdec_si128(m, w[1]);
    101    m = _mm_aesdeclast_si128(m, w[0]);
    102 
    103    m = _mm_xor_si128(m, iv);
    104    iv = *data;
    105    *data = m;
    106  }
    107  *p = iv;
    108 }
    109 
    110 void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
    111 {
    112  __m128i ctr = *p;
    113  __m128i one;
    114  one.m128i_u64[0] = 1;
    115  one.m128i_u64[1] = 0;
    116  for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
    117  {
    118    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
    119    const __m128i *w = p;
    120    __m128i m0, m1, m2;
    121    {
    122      const __m128i t = w[2];
    123      ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
    124      ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
    125      ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
    126    }
    127    w += 3;
    128    do
    129    {
    130      AES_ENC(0)
    131      AES_ENC(1)
    132      w += 2;
    133    }
    134    while (--numRounds2 != 0);
    135    AES_ENC(0)
    136    AES_ENC_LAST(1)
    137    data[0] = _mm_xor_si128(data[0], m0);
    138    data[1] = _mm_xor_si128(data[1], m1);
    139    data[2] = _mm_xor_si128(data[2], m2);
    140  }
    141  for (; numBlocks != 0; numBlocks--, data++)
    142  {
    143    UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
    144    const __m128i *w = p;
    145    __m128i m;
    146    ctr = _mm_add_epi64(ctr, one);
    147    m = _mm_xor_si128(ctr, p[2]);
    148    w += 3;
    149    do
    150    {
    151      m = _mm_aesenc_si128(m, w[0]);
    152      m = _mm_aesenc_si128(m, w[1]);
    153      w += 2;
    154    }
    155    while (--numRounds2 != 0);
    156    m = _mm_aesenc_si128(m, w[0]);
    157    m = _mm_aesenclast_si128(m, w[1]);
    158    *data = _mm_xor_si128(*data, m);
    159  }
    160  *p = ctr;
    161 }
    162 
    163 #else
    164 
    165 void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
    166 void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
    167 void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
    168 
    169 void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
    170 {
    171  AesCbc_Encode(p, data, numBlocks);
    172 }
    173 
    174 void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
    175 {
    176  AesCbc_Decode(p, data, numBlocks);
    177 }
    178 
    179 void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
    180 {
    181  AesCtr_Code(p, data, numBlocks);
    182 }
    183 
    184 #endif