rijndael.c (47416B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifdef FREEBL_NO_DEPEND 6 #include "stubs.h" 7 #endif 8 9 #include "blapit.h" 10 #include "prenv.h" 11 #include "prerr.h" 12 #include "prinit.h" 13 #include "secerr.h" 14 15 #include "prtypes.h" 16 #include "blapi.h" 17 #include "rijndael.h" 18 19 #include "cts.h" 20 #include "ctr.h" 21 #include "gcm.h" 22 #include "mpi.h" 23 24 #if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64) 25 // not test yet on big endian platform of arm 26 #undef USE_HW_AES 27 #endif 28 29 #ifdef __powerpc64__ 30 #include "ppc-crypto.h" 31 #endif 32 33 #ifdef USE_HW_AES 34 #ifdef NSS_X86_OR_X64 35 #include "intel-aes.h" 36 #else 37 #include "aes-armv8.h" 38 #endif 39 #endif /* USE_HW_AES */ 40 #ifdef INTEL_GCM 41 #include "intel-gcm.h" 42 #endif /* INTEL_GCM */ 43 #if defined(USE_PPC_CRYPTO) && defined(PPC_GCM) 44 #include "ppc-gcm.h" 45 #endif 46 47 /* Forward declarations */ 48 void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, 49 unsigned int Nk); 50 void rijndael_native_encryptBlock(AESContext *cx, 51 unsigned char *output, 52 const unsigned char *input); 53 void rijndael_native_decryptBlock(AESContext *cx, 54 unsigned char *output, 55 const unsigned char *input); 56 void native_xorBlock(unsigned char *out, 57 const unsigned char *a, 58 const unsigned char *b); 59 60 /* Stub definitions for the above rijndael_native_* functions, which 61 * shouldn't be used unless NSS_X86_OR_X64 is defined */ 62 #ifndef NSS_X86_OR_X64 63 void 64 rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, 65 unsigned int Nk) 66 { 67 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 68 PORT_Assert(0); 69 } 70 71 void 72 rijndael_native_encryptBlock(AESContext *cx, 73 unsigned char *output, 74 const unsigned char *input) 75 { 76 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 77 PORT_Assert(0); 78 } 79 80 void 81 rijndael_native_decryptBlock(AESContext *cx, 82 unsigned char *output, 83 const unsigned char *input) 84 { 85 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 86 PORT_Assert(0); 87 } 88 89 void 90 native_xorBlock(unsigned char *out, const unsigned char *a, 91 const unsigned char *b) 92 { 93 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 94 PORT_Assert(0); 95 } 96 #endif /* NSS_X86_OR_X64 */ 97 98 /* 99 * There are currently three ways to build this code, varying in performance 100 * and code size. 101 * 102 * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab 103 * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table 104 * values "on-the-fly", using gfm 105 * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros 106 * 107 * The default is RIJNDAEL_INCLUDE_TABLES. 108 */ 109 110 /* 111 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], 112 * T**-1[0..4], IMXC[0..4] 113 * When building anything else, includes S, S**-1, Rcon 114 */ 115 #include "rijndael32.tab" 116 117 #if defined(RIJNDAEL_INCLUDE_TABLES) 118 /* 119 * RIJNDAEL_INCLUDE_TABLES 120 */ 121 #define T0(i) _T0[i] 122 #define T1(i) _T1[i] 123 #define T2(i) _T2[i] 124 #define T3(i) _T3[i] 125 #define TInv0(i) _TInv0[i] 126 #define TInv1(i) _TInv1[i] 127 #define TInv2(i) _TInv2[i] 128 #define TInv3(i) _TInv3[i] 129 #define IMXC0(b) _IMXC0[b] 130 #define IMXC1(b) _IMXC1[b] 131 #define IMXC2(b) _IMXC2[b] 132 #define IMXC3(b) _IMXC3[b] 133 /* The S-box can be recovered from the T-tables */ 134 #ifdef IS_LITTLE_ENDIAN 135 #define SBOX(b) ((PRUint8)_T3[b]) 136 #else 137 #define SBOX(b) ((PRUint8)_T1[b]) 138 #endif 139 #define SINV(b) (_SInv[b]) 140 141 #else /* not RIJNDAEL_INCLUDE_TABLES */ 142 143 /* 144 * Code for generating T-table values. 145 */ 146 147 #ifdef IS_LITTLE_ENDIAN 148 #define WORD4(b0, b1, b2, b3) \ 149 ((((PRUint32)b3) << 24) | \ 150 (((PRUint32)b2) << 16) | \ 151 (((PRUint32)b1) << 8) | \ 152 ((PRUint32)b0)) 153 #else 154 #define WORD4(b0, b1, b2, b3) \ 155 ((((PRUint32)b0) << 24) | \ 156 (((PRUint32)b1) << 16) | \ 157 (((PRUint32)b2) << 8) | \ 158 ((PRUint32)b3)) 159 #endif 160 161 /* 162 * Define the S and S**-1 tables (both have been stored) 163 */ 164 #define SBOX(b) (_S[b]) 165 #define SINV(b) (_SInv[b]) 166 167 /* 168 * The function xtime, used for Galois field multiplication 169 */ 170 #define XTIME(a) \ 171 ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) 172 173 /* Choose GFM method (macros or function) */ 174 #if defined(RIJNDAEL_GENERATE_VALUES_MACRO) 175 176 /* 177 * Galois field GF(2**8) multipliers, in macro form 178 */ 179 #define GFM01(a) \ 180 (a) /* a * 01 = a, the identity */ 181 #define GFM02(a) \ 182 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ 183 #define GFM04(a) \ 184 (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ 185 #define GFM08(a) \ 186 (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ 187 #define GFM03(a) \ 188 (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ 189 #define GFM09(a) \ 190 (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ 191 #define GFM0B(a) \ 192 (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ 193 #define GFM0D(a) \ 194 (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ 195 #define GFM0E(a) \ 196 (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ 197 198 #else /* RIJNDAEL_GENERATE_VALUES */ 199 200 /* GF_MULTIPLY 201 * 202 * multiply two bytes represented in GF(2**8), mod (x**4 + 1) 203 */ 204 PRUint8 205 gfm(PRUint8 a, PRUint8 b) 206 { 207 PRUint8 res = 0; 208 while (b > 0) { 209 res = (b & 0x01) ? res ^ a : res; 210 a = XTIME(a); 211 b >>= 1; 212 } 213 return res; 214 } 215 216 #define GFM01(a) \ 217 (a) /* a * 01 = a, the identity */ 218 #define GFM02(a) \ 219 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ 220 #define GFM03(a) \ 221 (gfm(a, 0x03)) /* a * 03 */ 222 #define GFM09(a) \ 223 (gfm(a, 0x09)) /* a * 09 */ 224 #define GFM0B(a) \ 225 (gfm(a, 0x0B)) /* a * 0B */ 226 #define GFM0D(a) \ 227 (gfm(a, 0x0D)) /* a * 0D */ 228 #define GFM0E(a) \ 229 (gfm(a, 0x0E)) /* a * 0E */ 230 231 #endif /* choosing GFM function */ 232 233 /* 234 * The T-tables 235 */ 236 #define G_T0(i) \ 237 (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)))) 238 #define G_T1(i) \ 239 (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)))) 240 #define G_T2(i) \ 241 (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)))) 242 #define G_T3(i) \ 243 (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)))) 244 245 /* 246 * The inverse T-tables 247 */ 248 #define G_TInv0(i) \ 249 (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)))) 250 #define G_TInv1(i) \ 251 (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)))) 252 #define G_TInv2(i) \ 253 (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)))) 254 #define G_TInv3(i) \ 255 (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)))) 256 257 /* 258 * The inverse mix column tables 259 */ 260 #define G_IMXC0(i) \ 261 (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i))) 262 #define G_IMXC1(i) \ 263 (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i))) 264 #define G_IMXC2(i) \ 265 (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i))) 266 #define G_IMXC3(i) \ 267 (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i))) 268 269 /* Now choose the T-table indexing method */ 270 #if defined(RIJNDAEL_GENERATE_VALUES) 271 /* generate values for the tables with a function*/ 272 static PRUint32 273 gen_TInvXi(PRUint8 tx, PRUint8 i) 274 { 275 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; 276 si01 = SINV(i); 277 si02 = XTIME(si01); 278 si04 = XTIME(si02); 279 si08 = XTIME(si04); 280 si03 = si02 ^ si01; 281 si09 = si08 ^ si01; 282 si0B = si08 ^ si03; 283 si0D = si09 ^ si04; 284 si0E = si08 ^ si04 ^ si02; 285 switch (tx) { 286 case 0: 287 return WORD4(si0E, si09, si0D, si0B); 288 case 1: 289 return WORD4(si0B, si0E, si09, si0D); 290 case 2: 291 return WORD4(si0D, si0B, si0E, si09); 292 case 3: 293 return WORD4(si09, si0D, si0B, si0E); 294 } 295 return -1; 296 } 297 #define T0(i) G_T0(i) 298 #define T1(i) G_T1(i) 299 #define T2(i) G_T2(i) 300 #define T3(i) G_T3(i) 301 #define TInv0(i) gen_TInvXi(0, i) 302 #define TInv1(i) gen_TInvXi(1, i) 303 #define TInv2(i) gen_TInvXi(2, i) 304 #define TInv3(i) gen_TInvXi(3, i) 305 #define IMXC0(b) G_IMXC0(b) 306 #define IMXC1(b) G_IMXC1(b) 307 #define IMXC2(b) G_IMXC2(b) 308 #define IMXC3(b) G_IMXC3(b) 309 #else /* RIJNDAEL_GENERATE_VALUES_MACRO */ 310 /* generate values for the tables with macros */ 311 #define T0(i) G_T0(i) 312 #define T1(i) G_T1(i) 313 #define T2(i) G_T2(i) 314 #define T3(i) G_T3(i) 315 #define TInv0(i) G_TInv0(i) 316 #define TInv1(i) G_TInv1(i) 317 #define TInv2(i) G_TInv2(i) 318 #define TInv3(i) G_TInv3(i) 319 #define IMXC0(b) G_IMXC0(b) 320 #define IMXC1(b) G_IMXC1(b) 321 #define IMXC2(b) G_IMXC2(b) 322 #define IMXC3(b) G_IMXC3(b) 323 #endif /* choose T-table indexing method */ 324 325 #endif /* not RIJNDAEL_INCLUDE_TABLES */ 326 327 /************************************************************************** 328 * 329 * Stuff related to the Rijndael key schedule 330 * 331 *************************************************************************/ 332 333 #define SUBBYTE(w) \ 334 ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \ 335 (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \ 336 (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \ 337 (((PRUint32)SBOX((w)&0xff)))) 338 339 #ifdef IS_LITTLE_ENDIAN 340 #define ROTBYTE(b) \ 341 ((b >> 8) | (b << 24)) 342 #else 343 #define ROTBYTE(b) \ 344 ((b << 8) | (b >> 24)) 345 #endif 346 347 /* rijndael_key_expansion7 348 * 349 * Generate the expanded key from the key input by the user. 350 * XXX 351 * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte 352 * transformation is done periodically. The period is every 4 bytes, and 353 * since 7%4 != 0 this happens at different times for each key word (unlike 354 * Nk == 8 where it happens twice in every key word, in the same positions). 355 * For now, I'm implementing this case "dumbly", w/o any unrolling. 356 */ 357 static void 358 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) 359 { 360 unsigned int i; 361 PRUint32 *W; 362 PRUint32 *pW; 363 PRUint32 tmp; 364 W = cx->k.expandedKey; 365 /* 1. the first Nk words contain the cipher key */ 366 memcpy(W, key, Nk * 4); 367 i = Nk; 368 /* 2. loop until full expanded key is obtained */ 369 pW = W + i - 1; 370 for (; i < cx->Nb * (cx->Nr + 1); ++i) { 371 tmp = *pW++; 372 if (i % Nk == 0) 373 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 374 else if (i % Nk == 4) 375 tmp = SUBBYTE(tmp); 376 *pW = W[i - Nk] ^ tmp; 377 } 378 } 379 380 /* rijndael_key_expansion 381 * 382 * Generate the expanded key from the key input by the user. 383 */ 384 static void 385 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) 386 { 387 unsigned int i; 388 PRUint32 *W; 389 PRUint32 *pW; 390 PRUint32 tmp; 391 unsigned int round_key_words = cx->Nb * (cx->Nr + 1); 392 if (Nk == 7) { 393 rijndael_key_expansion7(cx, key, Nk); 394 return; 395 } 396 W = cx->k.expandedKey; 397 /* The first Nk words contain the input cipher key */ 398 memcpy(W, key, Nk * 4); 399 i = Nk; 400 pW = W + i - 1; 401 /* Loop over all sets of Nk words, except the last */ 402 while (i < round_key_words - Nk) { 403 tmp = *pW++; 404 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 405 *pW = W[i++ - Nk] ^ tmp; 406 tmp = *pW++; 407 *pW = W[i++ - Nk] ^ tmp; 408 tmp = *pW++; 409 *pW = W[i++ - Nk] ^ tmp; 410 tmp = *pW++; 411 *pW = W[i++ - Nk] ^ tmp; 412 if (Nk == 4) 413 continue; 414 switch (Nk) { 415 case 8: 416 tmp = *pW++; 417 tmp = SUBBYTE(tmp); 418 *pW = W[i++ - Nk] ^ tmp; 419 case 7: 420 tmp = *pW++; 421 *pW = W[i++ - Nk] ^ tmp; 422 case 6: 423 tmp = *pW++; 424 *pW = W[i++ - Nk] ^ tmp; 425 case 5: 426 tmp = *pW++; 427 *pW = W[i++ - Nk] ^ tmp; 428 } 429 } 430 /* Generate the last word */ 431 tmp = *pW++; 432 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; 433 *pW = W[i++ - Nk] ^ tmp; 434 /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, 435 * since the above loop generated all but the last Nk key words, there 436 * is no more need for the SubByte transformation. 437 */ 438 if (Nk < 8) { 439 for (; i < round_key_words; ++i) { 440 tmp = *pW++; 441 *pW = W[i - Nk] ^ tmp; 442 } 443 } else { 444 /* except in the case when Nk == 8. Then one more SubByte may have 445 * to be performed, at i % Nk == 4. 446 */ 447 for (; i < round_key_words; ++i) { 448 tmp = *pW++; 449 if (i % Nk == 4) 450 tmp = SUBBYTE(tmp); 451 *pW = W[i - Nk] ^ tmp; 452 } 453 } 454 } 455 456 /* rijndael_invkey_expansion 457 * 458 * Generate the expanded key for the inverse cipher from the key input by 459 * the user. 460 */ 461 static void 462 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) 463 { 464 unsigned int r; 465 PRUint32 *roundkeyw; 466 PRUint8 *b; 467 int Nb = cx->Nb; 468 /* begins like usual key expansion ... */ 469 rijndael_key_expansion(cx, key, Nk); 470 /* ... but has the additional step of InvMixColumn, 471 * excepting the first and last round keys. 472 */ 473 roundkeyw = cx->k.expandedKey + cx->Nb; 474 for (r = 1; r < cx->Nr; ++r) { 475 /* each key word, roundkeyw, represents a column in the key 476 * matrix. Each column is multiplied by the InvMixColumn matrix. 477 * [ 0E 0B 0D 09 ] [ b0 ] 478 * [ 09 0E 0B 0D ] * [ b1 ] 479 * [ 0D 09 0E 0B ] [ b2 ] 480 * [ 0B 0D 09 0E ] [ b3 ] 481 */ 482 b = (PRUint8 *)roundkeyw; 483 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 484 b = (PRUint8 *)roundkeyw; 485 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 486 b = (PRUint8 *)roundkeyw; 487 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 488 b = (PRUint8 *)roundkeyw; 489 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); 490 if (Nb <= 4) 491 continue; 492 switch (Nb) { 493 case 8: 494 b = (PRUint8 *)roundkeyw; 495 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 496 IMXC2(b[2]) ^ IMXC3(b[3]); 497 case 7: 498 b = (PRUint8 *)roundkeyw; 499 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 500 IMXC2(b[2]) ^ IMXC3(b[3]); 501 case 6: 502 b = (PRUint8 *)roundkeyw; 503 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 504 IMXC2(b[2]) ^ IMXC3(b[3]); 505 case 5: 506 b = (PRUint8 *)roundkeyw; 507 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ 508 IMXC2(b[2]) ^ IMXC3(b[3]); 509 } 510 } 511 } 512 513 /************************************************************************** 514 * 515 * Stuff related to Rijndael encryption/decryption. 516 * 517 *************************************************************************/ 518 519 #ifdef IS_LITTLE_ENDIAN 520 #define BYTE0WORD(w) ((w)&0x000000ff) 521 #define BYTE1WORD(w) ((w)&0x0000ff00) 522 #define BYTE2WORD(w) ((w)&0x00ff0000) 523 #define BYTE3WORD(w) ((w)&0xff000000) 524 #else 525 #define BYTE0WORD(w) ((w)&0xff000000) 526 #define BYTE1WORD(w) ((w)&0x00ff0000) 527 #define BYTE2WORD(w) ((w)&0x0000ff00) 528 #define BYTE3WORD(w) ((w)&0x000000ff) 529 #endif 530 531 typedef union { 532 PRUint32 w[4]; 533 PRUint8 b[16]; 534 } rijndael_state; 535 536 #define COLUMN_0(state) state.w[0] 537 #define COLUMN_1(state) state.w[1] 538 #define COLUMN_2(state) state.w[2] 539 #define COLUMN_3(state) state.w[3] 540 541 #define STATE_BYTE(i) state.b[i] 542 543 // out = a ^ b 544 inline static void 545 xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b) 546 { 547 for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) { 548 (out)[j] = (a)[j] ^ (b)[j]; 549 } 550 } 551 552 static void NO_SANITIZE_ALIGNMENT 553 rijndael_encryptBlock128(AESContext *cx, 554 unsigned char *output, 555 const unsigned char *input) 556 { 557 unsigned int r; 558 PRUint32 *roundkeyw; 559 rijndael_state state; 560 PRUint32 C0, C1, C2, C3; 561 #if defined(NSS_X86_OR_X64) 562 #define pIn input 563 #define pOut output 564 #else 565 unsigned char *pIn, *pOut; 566 PRUint32 inBuf[4], outBuf[4]; 567 568 if ((ptrdiff_t)input & 0x3) { 569 memcpy(inBuf, input, sizeof inBuf); 570 pIn = (unsigned char *)inBuf; 571 } else { 572 pIn = (unsigned char *)input; 573 } 574 if ((ptrdiff_t)output & 0x3) { 575 pOut = (unsigned char *)outBuf; 576 } else { 577 pOut = (unsigned char *)output; 578 } 579 #endif 580 roundkeyw = cx->k.expandedKey; 581 /* Step 1: Add Round Key 0 to initial state */ 582 COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++; 583 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++; 584 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++; 585 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; 586 /* Step 2: Loop over rounds [1..NR-1] */ 587 for (r = 1; r < cx->Nr; ++r) { 588 /* Do ShiftRow, ByteSub, and MixColumn all at once */ 589 C0 = T0(STATE_BYTE(0)) ^ 590 T1(STATE_BYTE(5)) ^ 591 T2(STATE_BYTE(10)) ^ 592 T3(STATE_BYTE(15)); 593 C1 = T0(STATE_BYTE(4)) ^ 594 T1(STATE_BYTE(9)) ^ 595 T2(STATE_BYTE(14)) ^ 596 T3(STATE_BYTE(3)); 597 C2 = T0(STATE_BYTE(8)) ^ 598 T1(STATE_BYTE(13)) ^ 599 T2(STATE_BYTE(2)) ^ 600 T3(STATE_BYTE(7)); 601 C3 = T0(STATE_BYTE(12)) ^ 602 T1(STATE_BYTE(1)) ^ 603 T2(STATE_BYTE(6)) ^ 604 T3(STATE_BYTE(11)); 605 /* Round key addition */ 606 COLUMN_0(state) = C0 ^ *roundkeyw++; 607 COLUMN_1(state) = C1 ^ *roundkeyw++; 608 COLUMN_2(state) = C2 ^ *roundkeyw++; 609 COLUMN_3(state) = C3 ^ *roundkeyw++; 610 } 611 /* Step 3: Do the last round */ 612 /* Final round does not employ MixColumn */ 613 C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | 614 (BYTE1WORD(T3(STATE_BYTE(5)))) | 615 (BYTE2WORD(T0(STATE_BYTE(10)))) | 616 (BYTE3WORD(T1(STATE_BYTE(15))))) ^ 617 *roundkeyw++; 618 C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | 619 (BYTE1WORD(T3(STATE_BYTE(9)))) | 620 (BYTE2WORD(T0(STATE_BYTE(14)))) | 621 (BYTE3WORD(T1(STATE_BYTE(3))))) ^ 622 *roundkeyw++; 623 C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | 624 (BYTE1WORD(T3(STATE_BYTE(13)))) | 625 (BYTE2WORD(T0(STATE_BYTE(2)))) | 626 (BYTE3WORD(T1(STATE_BYTE(7))))) ^ 627 *roundkeyw++; 628 C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | 629 (BYTE1WORD(T3(STATE_BYTE(1)))) | 630 (BYTE2WORD(T0(STATE_BYTE(6)))) | 631 (BYTE3WORD(T1(STATE_BYTE(11))))) ^ 632 *roundkeyw++; 633 *((PRUint32 *)pOut) = C0; 634 *((PRUint32 *)(pOut + 4)) = C1; 635 *((PRUint32 *)(pOut + 8)) = C2; 636 *((PRUint32 *)(pOut + 12)) = C3; 637 #if defined(NSS_X86_OR_X64) 638 #undef pIn 639 #undef pOut 640 #else 641 if ((ptrdiff_t)output & 0x3) { 642 memcpy(output, outBuf, sizeof outBuf); 643 } 644 #endif 645 } 646 647 static void NO_SANITIZE_ALIGNMENT 648 rijndael_decryptBlock128(AESContext *cx, 649 unsigned char *output, 650 const unsigned char *input) 651 { 652 int r; 653 PRUint32 *roundkeyw; 654 rijndael_state state; 655 PRUint32 C0, C1, C2, C3; 656 #if defined(NSS_X86_OR_X64) 657 #define pIn input 658 #define pOut output 659 #else 660 unsigned char *pIn, *pOut; 661 PRUint32 inBuf[4], outBuf[4]; 662 663 if ((ptrdiff_t)input & 0x3) { 664 memcpy(inBuf, input, sizeof inBuf); 665 pIn = (unsigned char *)inBuf; 666 } else { 667 pIn = (unsigned char *)input; 668 } 669 if ((ptrdiff_t)output & 0x3) { 670 pOut = (unsigned char *)outBuf; 671 } else { 672 pOut = (unsigned char *)output; 673 } 674 #endif 675 roundkeyw = cx->k.expandedKey + cx->Nb * cx->Nr + 3; 676 /* reverse the final key addition */ 677 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; 678 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; 679 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; 680 COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--; 681 /* Loop over rounds in reverse [NR..1] */ 682 for (r = cx->Nr; r > 1; --r) { 683 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ 684 C0 = TInv0(STATE_BYTE(0)) ^ 685 TInv1(STATE_BYTE(13)) ^ 686 TInv2(STATE_BYTE(10)) ^ 687 TInv3(STATE_BYTE(7)); 688 C1 = TInv0(STATE_BYTE(4)) ^ 689 TInv1(STATE_BYTE(1)) ^ 690 TInv2(STATE_BYTE(14)) ^ 691 TInv3(STATE_BYTE(11)); 692 C2 = TInv0(STATE_BYTE(8)) ^ 693 TInv1(STATE_BYTE(5)) ^ 694 TInv2(STATE_BYTE(2)) ^ 695 TInv3(STATE_BYTE(15)); 696 C3 = TInv0(STATE_BYTE(12)) ^ 697 TInv1(STATE_BYTE(9)) ^ 698 TInv2(STATE_BYTE(6)) ^ 699 TInv3(STATE_BYTE(3)); 700 /* Invert the key addition step */ 701 COLUMN_3(state) = C3 ^ *roundkeyw--; 702 COLUMN_2(state) = C2 ^ *roundkeyw--; 703 COLUMN_1(state) = C1 ^ *roundkeyw--; 704 COLUMN_0(state) = C0 ^ *roundkeyw--; 705 } 706 /* inverse sub */ 707 pOut[0] = SINV(STATE_BYTE(0)); 708 pOut[1] = SINV(STATE_BYTE(13)); 709 pOut[2] = SINV(STATE_BYTE(10)); 710 pOut[3] = SINV(STATE_BYTE(7)); 711 pOut[4] = SINV(STATE_BYTE(4)); 712 pOut[5] = SINV(STATE_BYTE(1)); 713 pOut[6] = SINV(STATE_BYTE(14)); 714 pOut[7] = SINV(STATE_BYTE(11)); 715 pOut[8] = SINV(STATE_BYTE(8)); 716 pOut[9] = SINV(STATE_BYTE(5)); 717 pOut[10] = SINV(STATE_BYTE(2)); 718 pOut[11] = SINV(STATE_BYTE(15)); 719 pOut[12] = SINV(STATE_BYTE(12)); 720 pOut[13] = SINV(STATE_BYTE(9)); 721 pOut[14] = SINV(STATE_BYTE(6)); 722 pOut[15] = SINV(STATE_BYTE(3)); 723 /* final key addition */ 724 *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; 725 *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; 726 *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; 727 *((PRUint32 *)pOut) ^= *roundkeyw--; 728 #if defined(NSS_X86_OR_X64) 729 #undef pIn 730 #undef pOut 731 #else 732 if ((ptrdiff_t)output & 0x3) { 733 memcpy(output, outBuf, sizeof outBuf); 734 } 735 #endif 736 } 737 738 /************************************************************************** 739 * 740 * Rijndael modes of operation (ECB and CBC) 741 * 742 *************************************************************************/ 743 744 static SECStatus 745 rijndael_encryptECB(AESContext *cx, unsigned char *output, 746 unsigned int *outputLen, unsigned int maxOutputLen, 747 const unsigned char *input, unsigned int inputLen, unsigned int blocksize) 748 { 749 PORT_Assert(blocksize == AES_BLOCK_SIZE); 750 PRBool aesni = aesni_support(); 751 while (inputLen > 0) { 752 if (aesni) { 753 rijndael_native_encryptBlock(cx, output, input); 754 } else { 755 rijndael_encryptBlock128(cx, output, input); 756 } 757 output += AES_BLOCK_SIZE; 758 input += AES_BLOCK_SIZE; 759 inputLen -= AES_BLOCK_SIZE; 760 } 761 return SECSuccess; 762 } 763 764 static SECStatus 765 rijndael_encryptCBC(AESContext *cx, unsigned char *output, 766 unsigned int *outputLen, unsigned int maxOutputLen, 767 const unsigned char *input, unsigned int inputLen, unsigned int blocksize) 768 { 769 PORT_Assert(blocksize == AES_BLOCK_SIZE); 770 unsigned char *lastblock = cx->iv; 771 unsigned char inblock[AES_BLOCK_SIZE * 8]; 772 PRBool aesni = aesni_support(); 773 774 if (!inputLen) 775 return SECSuccess; 776 while (inputLen > 0) { 777 if (aesni) { 778 /* XOR with the last block (IV if first block) */ 779 native_xorBlock(inblock, input, lastblock); 780 /* encrypt */ 781 rijndael_native_encryptBlock(cx, output, inblock); 782 } else { 783 xorBlock(inblock, input, lastblock); 784 rijndael_encryptBlock128(cx, output, inblock); 785 } 786 787 /* move to the next block */ 788 lastblock = output; 789 output += AES_BLOCK_SIZE; 790 input += AES_BLOCK_SIZE; 791 inputLen -= AES_BLOCK_SIZE; 792 } 793 memcpy(cx->iv, lastblock, AES_BLOCK_SIZE); 794 return SECSuccess; 795 } 796 797 static SECStatus 798 rijndael_decryptECB(AESContext *cx, unsigned char *output, 799 unsigned int *outputLen, unsigned int maxOutputLen, 800 const unsigned char *input, unsigned int inputLen, unsigned int blocksize) 801 { 802 PORT_Assert(blocksize == AES_BLOCK_SIZE); 803 PRBool aesni = aesni_support(); 804 while (inputLen > 0) { 805 if (aesni) { 806 rijndael_native_decryptBlock(cx, output, input); 807 } else { 808 rijndael_decryptBlock128(cx, output, input); 809 } 810 output += AES_BLOCK_SIZE; 811 input += AES_BLOCK_SIZE; 812 inputLen -= AES_BLOCK_SIZE; 813 } 814 return SECSuccess; 815 } 816 817 static SECStatus 818 rijndael_decryptCBC(AESContext *cx, unsigned char *output, 819 unsigned int *outputLen, unsigned int maxOutputLen, 820 const unsigned char *input, unsigned int inputLen, unsigned int blocksize) 821 { 822 PORT_Assert(blocksize == AES_BLOCK_SIZE); 823 const unsigned char *in; 824 unsigned char *out; 825 unsigned char newIV[AES_BLOCK_SIZE]; 826 PRBool aesni = aesni_support(); 827 828 if (!inputLen) 829 return SECSuccess; 830 PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); 831 in = input + (inputLen - AES_BLOCK_SIZE); 832 memcpy(newIV, in, AES_BLOCK_SIZE); 833 out = output + (inputLen - AES_BLOCK_SIZE); 834 while (inputLen > AES_BLOCK_SIZE) { 835 if (aesni) { 836 // Use hardware acceleration for normal AES parameters. 837 rijndael_native_decryptBlock(cx, out, in); 838 native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]); 839 } else { 840 rijndael_decryptBlock128(cx, out, in); 841 xorBlock(out, out, &in[-AES_BLOCK_SIZE]); 842 } 843 out -= AES_BLOCK_SIZE; 844 in -= AES_BLOCK_SIZE; 845 inputLen -= AES_BLOCK_SIZE; 846 } 847 if (in == input) { 848 if (aesni) { 849 rijndael_native_decryptBlock(cx, out, in); 850 native_xorBlock(out, out, cx->iv); 851 } else { 852 rijndael_decryptBlock128(cx, out, in); 853 xorBlock(out, out, cx->iv); 854 } 855 } 856 memcpy(cx->iv, newIV, AES_BLOCK_SIZE); 857 return SECSuccess; 858 } 859 860 #define FREEBL_CIPHER_WRAP(ctxtype, mmm) \ 861 static SECStatus freeblCipher_##mmm(void *vctx, unsigned char *output, \ 862 unsigned int *outputLen, unsigned int maxOutputLen, \ 863 const unsigned char *input, unsigned int inputLen, \ 864 unsigned int blocksize) \ 865 { \ 866 ctxtype *ctx = vctx; \ 867 return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, blocksize); \ 868 } 869 870 FREEBL_CIPHER_WRAP(CTRContext, CTR_Update); 871 FREEBL_CIPHER_WRAP(CTSContext, CTS_DecryptUpdate); 872 FREEBL_CIPHER_WRAP(CTSContext, CTS_EncryptUpdate); 873 FREEBL_CIPHER_WRAP(GCMContext, GCM_DecryptUpdate); 874 FREEBL_CIPHER_WRAP(GCMContext, GCM_EncryptUpdate); 875 FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptCBC); 876 FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptECB); 877 FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptCBC); 878 FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptECB); 879 880 #if defined(INTEL_GCM) && defined(USE_HW_AES) 881 FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptUpdate); 882 FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptUpdate); 883 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) 884 FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptUpdate); 885 FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptUpdate); 886 #endif 887 888 #if defined(USE_HW_AES) 889 #if defined(NSS_X86_OR_X64) 890 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_128); 891 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_128); 892 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_128); 893 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_128); 894 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_192); 895 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_192); 896 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_192); 897 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_192); 898 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_256); 899 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_256); 900 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_256); 901 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_256); 902 903 #define freeblCipher_native_aes_ecb_worker(encrypt, keysize) \ 904 ((encrypt) \ 905 ? ((keysize) == 16 ? freeblCipher_intel_aes_encrypt_ecb_128 \ 906 : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_ecb_192 \ 907 : freeblCipher_intel_aes_encrypt_ecb_256) \ 908 : ((keysize) == 16 ? freeblCipher_intel_aes_decrypt_ecb_128 \ 909 : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_ecb_192 \ 910 : freeblCipher_intel_aes_decrypt_ecb_256)) 911 912 #define freeblCipher_native_aes_cbc_worker(encrypt, keysize) \ 913 ((encrypt) \ 914 ? ((keysize) == 16 ? freeblCipher_intel_aes_encrypt_cbc_128 \ 915 : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_cbc_192 \ 916 : freeblCipher_intel_aes_encrypt_cbc_256) \ 917 : ((keysize) == 16 ? freeblCipher_intel_aes_decrypt_cbc_128 \ 918 : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_cbc_192 \ 919 : freeblCipher_intel_aes_decrypt_cbc_256)) 920 #else 921 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_128); 922 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_128); 923 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_128); 924 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_128); 925 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_192); 926 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_192); 927 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_192); 928 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_192); 929 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_256); 930 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_256); 931 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_256); 932 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_256); 933 934 #define freeblCipher_native_aes_ecb_worker(encrypt, keysize) \ 935 ((encrypt) \ 936 ? ((keysize) == 16 ? freeblCipher_arm_aes_encrypt_ecb_128 \ 937 : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_ecb_192 \ 938 : freeblCipher_arm_aes_encrypt_ecb_256) \ 939 : ((keysize) == 16 ? freeblCipher_arm_aes_decrypt_ecb_128 \ 940 : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_ecb_192 \ 941 : freeblCipher_arm_aes_decrypt_ecb_256)) 942 943 #define freeblCipher_native_aes_cbc_worker(encrypt, keysize) \ 944 ((encrypt) \ 945 ? ((keysize) == 16 ? freeblCipher_arm_aes_encrypt_cbc_128 \ 946 : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_cbc_192 \ 947 : freeblCipher_arm_aes_encrypt_cbc_256) \ 948 : ((keysize) == 16 ? freeblCipher_arm_aes_decrypt_cbc_128 \ 949 : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_cbc_192 \ 950 : freeblCipher_arm_aes_decrypt_cbc_256)) 951 #endif 952 #endif 953 954 #if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64) 955 FREEBL_CIPHER_WRAP(CTRContext, CTR_Update_HW_AES); 956 #endif 957 958 #define FREEBL_AEAD_WRAP(ctxtype, mmm) \ 959 static SECStatus freeblAead_##mmm(void *vctx, unsigned char *output, \ 960 unsigned int *outputLen, unsigned int maxOutputLen, \ 961 const unsigned char *input, unsigned int inputLen, \ 962 void *params, unsigned int paramsLen, \ 963 const unsigned char *aad, unsigned int aadLen, \ 964 unsigned int blocksize) \ 965 { \ 966 ctxtype *ctx = vctx; \ 967 return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, params, paramsLen, aad, aadLen, blocksize); \ 968 } 969 970 FREEBL_AEAD_WRAP(GCMContext, GCM_EncryptAEAD); 971 FREEBL_AEAD_WRAP(GCMContext, GCM_DecryptAEAD); 972 973 #if defined(INTEL_GCM) && defined(USE_HW_AES) 974 FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptAEAD); 975 FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptAEAD); 976 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) 977 FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptAEAD); 978 FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptAEAD); 979 #endif 980 981 #define FREEBL_DESTROY_WRAP(ctxtype, mmm) \ 982 static void freeblDestroy_##mmm(void *vctx, PRBool freeit) \ 983 { \ 984 ctxtype *ctx = vctx; \ 985 mmm(ctx, freeit); \ 986 } 987 988 FREEBL_DESTROY_WRAP(CTRContext, CTR_DestroyContext); 989 FREEBL_DESTROY_WRAP(CTSContext, CTS_DestroyContext); 990 FREEBL_DESTROY_WRAP(GCMContext, GCM_DestroyContext); 991 992 #if defined(INTEL_GCM) && defined(USE_HW_AES) 993 FREEBL_DESTROY_WRAP(intel_AES_GCMContext, intel_AES_GCM_DestroyContext); 994 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) 995 FREEBL_DESTROY_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DestroyContext); 996 #endif 997 998 /************************************************************************ 999 * 1000 * BLAPI Interface functions 1001 * 1002 * The following functions implement the encryption routines defined in 1003 * BLAPI for the AES cipher, Rijndael. 1004 * 1005 ***********************************************************************/ 1006 1007 AESContext * 1008 AES_AllocateContext(void) 1009 { 1010 return PORT_ZNewAligned(AESContext, 16, mem); 1011 } 1012 1013 /* 1014 ** Initialize a new AES context suitable for AES encryption/decryption in 1015 ** the ECB or CBC mode. 1016 ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC 1017 */ 1018 static SECStatus 1019 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 1020 const unsigned char *iv, int mode, unsigned int encrypt) 1021 { 1022 unsigned int Nk; 1023 PRBool use_hw_aes; 1024 /* According to AES, block lengths are 128 and key lengths are 128, 192, or 1025 * 256 bits. We support other key sizes as well [128, 256] as long as the 1026 * length in bytes is divisible by 4. 1027 */ 1028 1029 if (key == NULL || 1030 keysize < AES_BLOCK_SIZE || 1031 keysize > 32 || 1032 keysize % 4 != 0) { 1033 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1034 return SECFailure; 1035 } 1036 if (mode != NSS_AES && mode != NSS_AES_CBC) { 1037 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1038 return SECFailure; 1039 } 1040 if (mode == NSS_AES_CBC && iv == NULL) { 1041 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1042 return SECFailure; 1043 } 1044 if (!cx) { 1045 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1046 return SECFailure; 1047 } 1048 #if defined(NSS_X86_OR_X64) || defined(USE_HW_AES) 1049 use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0; 1050 #else 1051 use_hw_aes = PR_FALSE; 1052 #endif 1053 /* Nb = (block size in bits) / 32 */ 1054 cx->Nb = AES_BLOCK_SIZE / 4; 1055 /* Nk = (key size in bits) / 32 */ 1056 Nk = keysize / 4; 1057 /* Obtain number of rounds from "table" */ 1058 cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); 1059 /* copy in the iv, if neccessary */ 1060 if (mode == NSS_AES_CBC) { 1061 memcpy(cx->iv, iv, AES_BLOCK_SIZE); 1062 #ifdef USE_HW_AES 1063 if (use_hw_aes) { 1064 cx->worker = freeblCipher_native_aes_cbc_worker(encrypt, keysize); 1065 } else 1066 #endif 1067 { 1068 cx->worker = encrypt ? freeblCipher_rijndael_encryptCBC : freeblCipher_rijndael_decryptCBC; 1069 } 1070 } else { 1071 #ifdef USE_HW_AES 1072 if (use_hw_aes) { 1073 cx->worker = freeblCipher_native_aes_ecb_worker(encrypt, keysize); 1074 } else 1075 #endif 1076 { 1077 cx->worker = encrypt ? freeblCipher_rijndael_encryptECB : freeblCipher_rijndael_decryptECB; 1078 } 1079 } 1080 PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); 1081 if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { 1082 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); 1083 return SECFailure; 1084 } 1085 #ifdef USE_HW_AES 1086 if (use_hw_aes) { 1087 native_aes_init(encrypt, keysize); 1088 } else 1089 #endif 1090 { 1091 /* Generate expanded key */ 1092 if (encrypt) { 1093 if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || 1094 cx->mode == NSS_AES_CTR)) { 1095 PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); 1096 /* Prepare hardware key for normal AES parameters. */ 1097 rijndael_native_key_expansion(cx, key, Nk); 1098 } else { 1099 rijndael_key_expansion(cx, key, Nk); 1100 } 1101 } else { 1102 rijndael_invkey_expansion(cx, key, Nk); 1103 } 1104 BLAPI_CLEAR_STACK(256) 1105 } 1106 cx->worker_cx = cx; 1107 cx->destroy = NULL; 1108 cx->isBlock = PR_TRUE; 1109 return SECSuccess; 1110 } 1111 1112 SECStatus 1113 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, 1114 const unsigned char *iv, int mode, unsigned int encrypt, 1115 unsigned int blocksize) 1116 { 1117 int basemode = mode; 1118 PRBool baseencrypt = encrypt; 1119 SECStatus rv; 1120 1121 if (blocksize != AES_BLOCK_SIZE) { 1122 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1123 return SECFailure; 1124 } 1125 1126 switch (mode) { 1127 case NSS_AES_CTS: 1128 basemode = NSS_AES_CBC; 1129 break; 1130 case NSS_AES_GCM: 1131 case NSS_AES_CTR: 1132 basemode = NSS_AES; 1133 baseencrypt = PR_TRUE; 1134 break; 1135 } 1136 /* Make sure enough is initialized so we can safely call Destroy. */ 1137 cx->worker_cx = NULL; 1138 cx->destroy = NULL; 1139 cx->mode = mode; 1140 rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt); 1141 if (rv != SECSuccess) { 1142 AES_DestroyContext(cx, PR_FALSE); 1143 return rv; 1144 } 1145 1146 /* finally, set up any mode specific contexts */ 1147 cx->worker_aead = 0; 1148 switch (mode) { 1149 case NSS_AES_CTS: 1150 cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv); 1151 cx->worker = encrypt ? freeblCipher_CTS_EncryptUpdate : freeblCipher_CTS_DecryptUpdate; 1152 cx->destroy = freeblDestroy_CTS_DestroyContext; 1153 cx->isBlock = PR_FALSE; 1154 break; 1155 case NSS_AES_GCM: 1156 #if defined(INTEL_GCM) && defined(USE_HW_AES) 1157 if (aesni_support() && (keysize % 8) == 0 && avx_support() && 1158 clmul_support()) { 1159 cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv); 1160 cx->worker = encrypt ? freeblCipher_intel_AES_GCM_EncryptUpdate 1161 : freeblCipher_intel_AES_GCM_DecryptUpdate; 1162 cx->worker_aead = encrypt ? freeblAead_intel_AES_GCM_EncryptAEAD 1163 : freeblAead_intel_AES_GCM_DecryptAEAD; 1164 cx->destroy = freeblDestroy_intel_AES_GCM_DestroyContext; 1165 cx->isBlock = PR_FALSE; 1166 } else 1167 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) 1168 if (ppc_crypto_support() && (keysize % 8) == 0) { 1169 cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv); 1170 cx->worker = encrypt ? freeblCipher_ppc_AES_GCM_EncryptUpdate 1171 : freeblCipher_ppc_AES_GCM_DecryptUpdate; 1172 cx->worker_aead = encrypt ? freeblAead_ppc_AES_GCM_EncryptAEAD 1173 : freeblAead_ppc_AES_GCM_DecryptAEAD; 1174 cx->destroy = freeblDestroy_ppc_AES_GCM_DestroyContext; 1175 cx->isBlock = PR_FALSE; 1176 } else 1177 #endif 1178 { 1179 cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); 1180 cx->worker = encrypt ? freeblCipher_GCM_EncryptUpdate 1181 : freeblCipher_GCM_DecryptUpdate; 1182 cx->worker_aead = encrypt ? freeblAead_GCM_EncryptAEAD 1183 : freeblAead_GCM_DecryptAEAD; 1184 1185 cx->destroy = freeblDestroy_GCM_DestroyContext; 1186 cx->isBlock = PR_FALSE; 1187 } 1188 break; 1189 case NSS_AES_CTR: 1190 cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv); 1191 #if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64) 1192 if (aesni_support() && (keysize % 8) == 0) { 1193 cx->worker = freeblCipher_CTR_Update_HW_AES; 1194 } else 1195 #endif 1196 { 1197 cx->worker = freeblCipher_CTR_Update; 1198 } 1199 cx->destroy = freeblDestroy_CTR_DestroyContext; 1200 cx->isBlock = PR_FALSE; 1201 break; 1202 default: 1203 /* everything has already been set up by aes_InitContext, just 1204 * return */ 1205 return SECSuccess; 1206 } 1207 /* check to see if we succeeded in getting the worker context */ 1208 if (cx->worker_cx == NULL) { 1209 /* no, just destroy the existing context */ 1210 cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ 1211 /* below that this isn't necessary */ 1212 AES_DestroyContext(cx, PR_FALSE); 1213 return SECFailure; 1214 } 1215 return SECSuccess; 1216 } 1217 1218 /* AES_CreateContext 1219 * 1220 * create a new context for Rijndael operations 1221 */ 1222 AESContext * 1223 AES_CreateContext(const unsigned char *key, const unsigned char *iv, 1224 int mode, int encrypt, 1225 unsigned int keysize, unsigned int blocksize) 1226 { 1227 AESContext *cx = AES_AllocateContext(); 1228 if (cx) { 1229 SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, 1230 blocksize); 1231 if (rv != SECSuccess) { 1232 AES_DestroyContext(cx, PR_TRUE); 1233 cx = NULL; 1234 } 1235 } 1236 return cx; 1237 } 1238 1239 /* 1240 * AES_DestroyContext 1241 * 1242 * Zero an AES cipher context. If freeit is true, also free the pointer 1243 * to the context. 1244 */ 1245 void 1246 AES_DestroyContext(AESContext *cx, PRBool freeit) 1247 { 1248 void *mem = cx->mem; 1249 if (cx->worker_cx && cx->destroy) { 1250 (*cx->destroy)(cx->worker_cx, PR_TRUE); 1251 cx->worker_cx = NULL; 1252 cx->destroy = NULL; 1253 } 1254 PORT_SafeZero(cx, sizeof(AESContext)); 1255 if (freeit) { 1256 PORT_Free(mem); 1257 } else { 1258 /* if we are not freeing the context, restore mem, We may get called 1259 * again to actually free the context */ 1260 cx->mem = mem; 1261 } 1262 } 1263 1264 /* 1265 * AES_Encrypt 1266 * 1267 * Encrypt an arbitrary-length buffer. The output buffer must already be 1268 * allocated to at least inputLen. 1269 */ 1270 SECStatus 1271 AES_Encrypt(AESContext *cx, unsigned char *output, 1272 unsigned int *outputLen, unsigned int maxOutputLen, 1273 const unsigned char *input, unsigned int inputLen) 1274 { 1275 /* Check args */ 1276 SECStatus rv; 1277 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { 1278 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1279 return SECFailure; 1280 } 1281 if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { 1282 PORT_SetError(SEC_ERROR_INPUT_LEN); 1283 return SECFailure; 1284 } 1285 if (maxOutputLen < inputLen) { 1286 PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1287 return SECFailure; 1288 } 1289 *outputLen = inputLen; 1290 #if UINT_MAX > MP_32BIT_MAX 1291 /* 1292 * we can guarentee that GSM won't overlfow if we limit the input to 1293 * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. 1294 * 1295 * We do it here to cover both hardware and software GCM operations. 1296 */ 1297 { 1298 PR_STATIC_ASSERT(sizeof(unsigned int) > 4); 1299 } 1300 if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) { 1301 PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1302 return SECFailure; 1303 } 1304 #else 1305 /* if we can't pass in a 32_bit number, then no such check needed */ 1306 { 1307 PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); 1308 } 1309 #endif 1310 1311 rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, 1312 input, inputLen, AES_BLOCK_SIZE); 1313 BLAPI_CLEAR_STACK(256) 1314 return rv; 1315 } 1316 1317 /* 1318 * AES_Decrypt 1319 * 1320 * Decrypt and arbitrary-length buffer. The output buffer must already be 1321 * allocated to at least inputLen. 1322 */ 1323 SECStatus 1324 AES_Decrypt(AESContext *cx, unsigned char *output, 1325 unsigned int *outputLen, unsigned int maxOutputLen, 1326 const unsigned char *input, unsigned int inputLen) 1327 { 1328 SECStatus rv; 1329 /* Check args */ 1330 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { 1331 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1332 return SECFailure; 1333 } 1334 if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { 1335 PORT_SetError(SEC_ERROR_INPUT_LEN); 1336 return SECFailure; 1337 } 1338 if ((cx->mode != NSS_AES_GCM) && (maxOutputLen < inputLen)) { 1339 PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1340 return SECFailure; 1341 } 1342 *outputLen = inputLen; 1343 rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, 1344 input, inputLen, AES_BLOCK_SIZE); 1345 BLAPI_CLEAR_STACK(256) 1346 return rv; 1347 } 1348 1349 /* 1350 * AES_Encrypt_AEAD 1351 * 1352 * Encrypt using GCM or CCM. include the nonce, extra data, and the tag 1353 */ 1354 SECStatus 1355 AES_AEAD(AESContext *cx, unsigned char *output, 1356 unsigned int *outputLen, unsigned int maxOutputLen, 1357 const unsigned char *input, unsigned int inputLen, 1358 void *params, unsigned int paramsLen, 1359 const unsigned char *aad, unsigned int aadLen) 1360 { 1361 SECStatus rv; 1362 /* Check args */ 1363 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0) || (aad == NULL && aadLen != 0) || params == NULL) { 1364 PORT_SetError(SEC_ERROR_INVALID_ARGS); 1365 return SECFailure; 1366 } 1367 if (cx->worker_aead == NULL) { 1368 PORT_SetError(SEC_ERROR_NOT_INITIALIZED); 1369 return SECFailure; 1370 } 1371 if (maxOutputLen < inputLen) { 1372 PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1373 return SECFailure; 1374 } 1375 *outputLen = inputLen; 1376 #if UINT_MAX > MP_32BIT_MAX 1377 /* 1378 * we can guarentee that GSM won't overlfow if we limit the input to 1379 * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. 1380 * 1381 * We do it here to cover both hardware and software GCM operations. 1382 */ 1383 { 1384 PR_STATIC_ASSERT(sizeof(unsigned int) > 4); 1385 } 1386 if (inputLen > MP_32BIT_MAX) { 1387 PORT_SetError(SEC_ERROR_OUTPUT_LEN); 1388 return SECFailure; 1389 } 1390 #else 1391 /* if we can't pass in a 32_bit number, then no such check needed */ 1392 { 1393 PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); 1394 } 1395 #endif 1396 1397 rv = (*cx->worker_aead)(cx->worker_cx, output, outputLen, maxOutputLen, 1398 input, inputLen, params, paramsLen, aad, aadLen, 1399 AES_BLOCK_SIZE); 1400 BLAPI_CLEAR_STACK(256) 1401 return rv; 1402 }