aes-x86.c (7628B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifdef FREEBL_NO_DEPEND 6 #include "stubs.h" 7 #endif 8 #include "rijndael.h" 9 #include "secerr.h" 10 11 #include <wmmintrin.h> /* aes-ni */ 12 13 #define EXPAND_KEY128(k, rcon, res) \ 14 tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ 15 tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ 16 tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ 17 tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ 18 tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ 19 res = _mm_xor_si128(tmp, tmp_key) 20 21 static void 22 native_key_expansion128(AESContext *cx, const unsigned char *key) 23 { 24 __m128i *keySchedule = cx->k.keySchedule; 25 pre_align __m128i tmp_key post_align; 26 pre_align __m128i tmp post_align; 27 keySchedule[0] = _mm_loadu_si128((__m128i *)key); 28 EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); 29 EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); 30 EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); 31 EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); 32 EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); 33 EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); 34 EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); 35 EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); 36 EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); 37 EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); 38 } 39 40 #define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ 41 tmp2 = _mm_slli_si128(k0, 4); \ 42 tmp1 = _mm_xor_si128(k0, tmp2); \ 43 tmp2 = _mm_slli_si128(tmp2, 4); \ 44 tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ 45 tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ 46 res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) 47 48 #define EXPAND_KEY192_PART2(res, k1, k2) \ 49 tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ 50 res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) 51 52 #define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ 53 EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ 54 EXPAND_KEY192_PART2(carry, res1, tmp3); \ 55 res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ 56 _mm_castsi128_pd(tmp3), 0)); \ 57 res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ 58 _mm_castsi128_pd(carry), 1)); \ 59 EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) 60 61 static void 62 native_key_expansion192(AESContext *cx, const unsigned char *key) 63 { 64 __m128i *keySchedule = cx->k.keySchedule; 65 pre_align __m128i tmp1 post_align; 66 pre_align __m128i tmp2 post_align; 67 pre_align __m128i tmp3 post_align; 68 pre_align __m128i carry post_align; 69 keySchedule[0] = _mm_loadu_si128((__m128i *)key); 70 keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); 71 EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], 72 keySchedule[3], carry, 0x1, 0x2); 73 EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); 74 EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], 75 keySchedule[6], carry, 0x4, 0x8); 76 EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); 77 EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], 78 keySchedule[9], carry, 0x10, 0x20); 79 EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); 80 EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], 81 keySchedule[12], carry, 0x40, 0x80); 82 } 83 84 #define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ 85 tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ 86 tmp2 = _mm_slli_si128(k1x, 4); \ 87 tmp1 = _mm_xor_si128(k1x, tmp2); \ 88 tmp2 = _mm_slli_si128(tmp2, 4); \ 89 tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ 90 res = _mm_xor_si128(tmp1, tmp_key); 91 92 #define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ 93 EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ 94 EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) 95 96 static void 97 native_key_expansion256(AESContext *cx, const unsigned char *key) 98 { 99 __m128i *keySchedule = cx->k.keySchedule; 100 pre_align __m128i tmp_key post_align; 101 pre_align __m128i tmp1 post_align; 102 pre_align __m128i tmp2 post_align; 103 keySchedule[0] = _mm_loadu_si128((__m128i *)key); 104 keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); 105 EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], 106 keySchedule[1], 0x01); 107 EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], 108 keySchedule[3], 0x02); 109 EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], 110 keySchedule[5], 0x04); 111 EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], 112 keySchedule[7], 0x08); 113 EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], 114 keySchedule[9], 0x10); 115 EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], 116 keySchedule[11], 0x20); 117 EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], 118 keySchedule[13], 0xFF); 119 } 120 121 /* 122 * AES key expansion using aes-ni instructions. 123 */ 124 void 125 rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, 126 unsigned int Nk) 127 { 128 switch (Nk) { 129 case 4: 130 native_key_expansion128(cx, key); 131 return; 132 case 6: 133 native_key_expansion192(cx, key); 134 return; 135 case 8: 136 native_key_expansion256(cx, key); 137 return; 138 default: 139 /* This shouldn't happen (checked by the caller). */ 140 return; 141 } 142 } 143 144 void 145 rijndael_native_encryptBlock(AESContext *cx, 146 unsigned char *output, 147 const unsigned char *input) 148 { 149 unsigned int i; 150 pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); 151 m = _mm_xor_si128(m, cx->k.keySchedule[0]); 152 for (i = 1; i < cx->Nr; ++i) { 153 m = _mm_aesenc_si128(m, cx->k.keySchedule[i]); 154 } 155 m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); 156 _mm_storeu_si128((__m128i *)output, m); 157 } 158 159 void 160 rijndael_native_decryptBlock(AESContext *cx, 161 unsigned char *output, 162 const unsigned char *input) 163 { 164 int i; 165 pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); 166 m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); 167 for (i = cx->Nr - 1; i > 0; --i) { 168 m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); 169 } 170 m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); 171 _mm_storeu_si128((__m128i *)output, m); 172 } 173 174 // out = a ^ b 175 void 176 native_xorBlock(unsigned char *out, 177 const unsigned char *a, 178 const unsigned char *b) 179 { 180 pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); 181 pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); 182 in1 = _mm_xor_si128(in1, in2); 183 _mm_storeu_si128((__m128i *)(out), in1); 184 }