aes-armv8.c (35181B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "secerr.h" 6 #include "rijndael.h" 7 8 #if ((defined(__clang__) || \ 9 (defined(__GNUC__) && defined(__GNUC_MINOR__) && \ 10 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \ 11 defined(IS_LITTLE_ENDIAN)) 12 13 #ifndef __ARM_FEATURE_CRYPTO 14 #error "Compiler option is invalid" 15 #endif 16 17 #include <arm_neon.h> 18 19 SECStatus 20 arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, 21 unsigned int *outputLen, 22 unsigned int maxOutputLen, 23 const unsigned char *input, 24 unsigned int inputLen, 25 unsigned int blocksize) 26 { 27 #if !defined(HAVE_UNALIGNED_ACCESS) 28 pre_align unsigned char buf[16] post_align; 29 #endif 30 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 31 uint8x16_t key11; 32 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 33 34 if (!inputLen) { 35 return SECSuccess; 36 } 37 38 key1 = vld1q_u8(key); 39 key2 = vld1q_u8(key + 16); 40 key3 = vld1q_u8(key + 32); 41 key4 = vld1q_u8(key + 48); 42 key5 = vld1q_u8(key + 64); 43 key6 = vld1q_u8(key + 80); 44 key7 = vld1q_u8(key + 96); 45 key8 = vld1q_u8(key + 112); 46 key9 = vld1q_u8(key + 128); 47 key10 = vld1q_u8(key + 144); 48 key11 = vld1q_u8(key + 160); 49 50 while (inputLen > 0) { 51 uint8x16_t state; 52 #if defined(HAVE_UNALIGNED_ACCESS) 53 state = vld1q_u8(input); 54 #else 55 if ((uintptr_t)input & 0x7) { 56 memcpy(buf, input, 16); 57 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 58 } else { 59 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 60 } 61 #endif 62 input += 16; 63 inputLen -= 16; 64 65 /* Rounds */ 66 state = vaeseq_u8(state, key1); 67 state = vaesmcq_u8(state); 68 state = vaeseq_u8(state, key2); 69 state = vaesmcq_u8(state); 70 state = vaeseq_u8(state, key3); 71 state = vaesmcq_u8(state); 72 state = vaeseq_u8(state, key4); 73 state = vaesmcq_u8(state); 74 state = vaeseq_u8(state, key5); 75 state = vaesmcq_u8(state); 76 state = vaeseq_u8(state, key6); 77 state = vaesmcq_u8(state); 78 state = vaeseq_u8(state, key7); 79 state = vaesmcq_u8(state); 80 state = vaeseq_u8(state, key8); 81 state = vaesmcq_u8(state); 82 state = vaeseq_u8(state, key9); 83 state = vaesmcq_u8(state); 84 state = vaeseq_u8(state, key10); 85 /* AddRoundKey */ 86 state = veorq_u8(state, key11); 87 88 #if defined(HAVE_UNALIGNED_ACCESS) 89 vst1q_u8(output, state); 90 #else 91 if ((uintptr_t)output & 0x7) { 92 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 93 memcpy(output, buf, 16); 94 } else { 95 vst1q_u8(__builtin_assume_aligned(output, 8), state); 96 } 97 #endif 98 output += 16; 99 } 100 101 return SECSuccess; 102 } 103 104 SECStatus 105 arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, 106 unsigned int *outputLen, 107 unsigned int maxOutputLen, 108 const unsigned char *input, 109 unsigned int inputLen, 110 unsigned int blocksize) 111 { 112 #if !defined(HAVE_UNALIGNED_ACCESS) 113 pre_align unsigned char buf[16] post_align; 114 #endif 115 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 116 uint8x16_t key11; 117 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 118 119 if (inputLen == 0) { 120 return SECSuccess; 121 } 122 123 key1 = vld1q_u8(key); 124 key2 = vld1q_u8(key + 16); 125 key3 = vld1q_u8(key + 32); 126 key4 = vld1q_u8(key + 48); 127 key5 = vld1q_u8(key + 64); 128 key6 = vld1q_u8(key + 80); 129 key7 = vld1q_u8(key + 96); 130 key8 = vld1q_u8(key + 112); 131 key9 = vld1q_u8(key + 128); 132 key10 = vld1q_u8(key + 144); 133 key11 = vld1q_u8(key + 160); 134 135 while (inputLen > 0) { 136 uint8x16_t state; 137 #if defined(HAVE_UNALIGNED_ACCESS) 138 state = vld1q_u8(input); 139 #else 140 if ((uintptr_t)input & 0x7) { 141 memcpy(buf, input, 16); 142 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 143 } else { 144 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 145 } 146 #endif 147 input += 16; 148 inputLen -= 16; 149 150 /* Rounds */ 151 state = vaesdq_u8(state, key11); 152 state = vaesimcq_u8(state); 153 state = vaesdq_u8(state, key10); 154 state = vaesimcq_u8(state); 155 state = vaesdq_u8(state, key9); 156 state = vaesimcq_u8(state); 157 state = vaesdq_u8(state, key8); 158 state = vaesimcq_u8(state); 159 state = vaesdq_u8(state, key7); 160 state = vaesimcq_u8(state); 161 state = vaesdq_u8(state, key6); 162 state = vaesimcq_u8(state); 163 state = vaesdq_u8(state, key5); 164 state = vaesimcq_u8(state); 165 state = vaesdq_u8(state, key4); 166 state = vaesimcq_u8(state); 167 state = vaesdq_u8(state, key3); 168 state = vaesimcq_u8(state); 169 state = vaesdq_u8(state, key2); 170 /* AddRoundKey */ 171 state = veorq_u8(state, key1); 172 173 #if defined(HAVE_UNALIGNED_ACCESS) 174 vst1q_u8(output, state); 175 #else 176 if ((uintptr_t)output & 0x7) { 177 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 178 memcpy(output, buf, 16); 179 } else { 180 vst1q_u8(__builtin_assume_aligned(output, 8), state); 181 } 182 #endif 183 output += 16; 184 } 185 186 return SECSuccess; 187 } 188 189 SECStatus 190 arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, 191 unsigned int *outputLen, 192 unsigned int maxOutputLen, 193 const unsigned char *input, 194 unsigned int inputLen, 195 unsigned int blocksize) 196 { 197 #if !defined(HAVE_UNALIGNED_ACCESS) 198 pre_align unsigned char buf[16] post_align; 199 #endif 200 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 201 uint8x16_t key11; 202 uint8x16_t iv; 203 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 204 205 if (!inputLen) { 206 return SECSuccess; 207 } 208 209 /* iv */ 210 iv = vld1q_u8(cx->iv); 211 212 /* expanedKey */ 213 key1 = vld1q_u8(key); 214 key2 = vld1q_u8(key + 16); 215 key3 = vld1q_u8(key + 32); 216 key4 = vld1q_u8(key + 48); 217 key5 = vld1q_u8(key + 64); 218 key6 = vld1q_u8(key + 80); 219 key7 = vld1q_u8(key + 96); 220 key8 = vld1q_u8(key + 112); 221 key9 = vld1q_u8(key + 128); 222 key10 = vld1q_u8(key + 144); 223 key11 = vld1q_u8(key + 160); 224 225 while (inputLen > 0) { 226 uint8x16_t state; 227 #if defined(HAVE_UNALIGNED_ACCESS) 228 state = vld1q_u8(input); 229 #else 230 if ((uintptr_t)input & 0x7) { 231 memcpy(buf, input, 16); 232 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 233 } else { 234 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 235 } 236 #endif 237 input += 16; 238 inputLen -= 16; 239 240 state = veorq_u8(state, iv); 241 242 /* Rounds */ 243 state = vaeseq_u8(state, key1); 244 state = vaesmcq_u8(state); 245 state = vaeseq_u8(state, key2); 246 state = vaesmcq_u8(state); 247 state = vaeseq_u8(state, key3); 248 state = vaesmcq_u8(state); 249 state = vaeseq_u8(state, key4); 250 state = vaesmcq_u8(state); 251 state = vaeseq_u8(state, key5); 252 state = vaesmcq_u8(state); 253 state = vaeseq_u8(state, key6); 254 state = vaesmcq_u8(state); 255 state = vaeseq_u8(state, key7); 256 state = vaesmcq_u8(state); 257 state = vaeseq_u8(state, key8); 258 state = vaesmcq_u8(state); 259 state = vaeseq_u8(state, key9); 260 state = vaesmcq_u8(state); 261 state = vaeseq_u8(state, key10); 262 /* AddRoundKey */ 263 state = veorq_u8(state, key11); 264 265 #if defined(HAVE_UNALIGNED_ACCESS) 266 vst1q_u8(output, state); 267 #else 268 if ((uintptr_t)output & 0x7) { 269 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 270 memcpy(output, buf, 16); 271 } else { 272 vst1q_u8(__builtin_assume_aligned(output, 8), state); 273 } 274 #endif 275 output += 16; 276 iv = state; 277 } 278 vst1q_u8(cx->iv, iv); 279 280 return SECSuccess; 281 } 282 283 SECStatus 284 arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, 285 unsigned int *outputLen, 286 unsigned int maxOutputLen, 287 const unsigned char *input, 288 unsigned int inputLen, 289 unsigned int blocksize) 290 { 291 #if !defined(HAVE_UNALIGNED_ACCESS) 292 pre_align unsigned char buf[16] post_align; 293 #endif 294 uint8x16_t iv; 295 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 296 uint8x16_t key11; 297 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 298 299 if (!inputLen) { 300 return SECSuccess; 301 } 302 303 /* iv */ 304 iv = vld1q_u8(cx->iv); 305 306 /* expanedKey */ 307 key1 = vld1q_u8(key); 308 key2 = vld1q_u8(key + 16); 309 key3 = vld1q_u8(key + 32); 310 key4 = vld1q_u8(key + 48); 311 key5 = vld1q_u8(key + 64); 312 key6 = vld1q_u8(key + 80); 313 key7 = vld1q_u8(key + 96); 314 key8 = vld1q_u8(key + 112); 315 key9 = vld1q_u8(key + 128); 316 key10 = vld1q_u8(key + 144); 317 key11 = vld1q_u8(key + 160); 318 319 while (inputLen > 0) { 320 uint8x16_t state, old_state; 321 #if defined(HAVE_UNALIGNED_ACCESS) 322 state = vld1q_u8(input); 323 #else 324 if ((uintptr_t)input & 0x7) { 325 memcpy(buf, input, 16); 326 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 327 } else { 328 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 329 } 330 #endif 331 old_state = state; 332 input += 16; 333 inputLen -= 16; 334 335 /* Rounds */ 336 state = vaesdq_u8(state, key11); 337 state = vaesimcq_u8(state); 338 state = vaesdq_u8(state, key10); 339 state = vaesimcq_u8(state); 340 state = vaesdq_u8(state, key9); 341 state = vaesimcq_u8(state); 342 state = vaesdq_u8(state, key8); 343 state = vaesimcq_u8(state); 344 state = vaesdq_u8(state, key7); 345 state = vaesimcq_u8(state); 346 state = vaesdq_u8(state, key6); 347 state = vaesimcq_u8(state); 348 state = vaesdq_u8(state, key5); 349 state = vaesimcq_u8(state); 350 state = vaesdq_u8(state, key4); 351 state = vaesimcq_u8(state); 352 state = vaesdq_u8(state, key3); 353 state = vaesimcq_u8(state); 354 state = vaesdq_u8(state, key2); 355 /* AddRoundKey */ 356 state = veorq_u8(state, key1); 357 358 state = veorq_u8(state, iv); 359 360 #if defined(HAVE_UNALIGNED_ACCESS) 361 vst1q_u8(output, state); 362 #else 363 if ((uintptr_t)output & 0x7) { 364 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 365 memcpy(output, buf, 16); 366 } else { 367 vst1q_u8(__builtin_assume_aligned(output, 8), state); 368 } 369 #endif 370 output += 16; 371 372 iv = old_state; 373 } 374 vst1q_u8(cx->iv, iv); 375 376 return SECSuccess; 377 } 378 379 SECStatus 380 arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, 381 unsigned int *outputLen, 382 unsigned int maxOutputLen, 383 const unsigned char *input, 384 unsigned int inputLen, 385 unsigned int blocksize) 386 { 387 #if !defined(HAVE_UNALIGNED_ACCESS) 388 pre_align unsigned char buf[16] post_align; 389 #endif 390 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 391 uint8x16_t key11, key12, key13; 392 PRUint8 *key = (PRUint8 *)cx->k.expandedKey; 393 394 if (!inputLen) { 395 return SECSuccess; 396 } 397 398 key1 = vld1q_u8(key); 399 key2 = vld1q_u8(key + 16); 400 key3 = vld1q_u8(key + 32); 401 key4 = vld1q_u8(key + 48); 402 key5 = vld1q_u8(key + 64); 403 key6 = vld1q_u8(key + 80); 404 key7 = vld1q_u8(key + 96); 405 key8 = vld1q_u8(key + 112); 406 key9 = vld1q_u8(key + 128); 407 key10 = vld1q_u8(key + 144); 408 key11 = vld1q_u8(key + 160); 409 key12 = vld1q_u8(key + 176); 410 key13 = vld1q_u8(key + 192); 411 412 while (inputLen > 0) { 413 uint8x16_t state; 414 #if defined(HAVE_UNALIGNED_ACCESS) 415 state = vld1q_u8(input); 416 #else 417 if ((uintptr_t)input & 0x7) { 418 memcpy(buf, input, 16); 419 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 420 } else { 421 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 422 } 423 #endif 424 input += 16; 425 inputLen -= 16; 426 427 /* Rounds */ 428 state = vaeseq_u8(state, key1); 429 state = vaesmcq_u8(state); 430 state = vaeseq_u8(state, key2); 431 state = vaesmcq_u8(state); 432 state = vaeseq_u8(state, key3); 433 state = vaesmcq_u8(state); 434 state = vaeseq_u8(state, key4); 435 state = vaesmcq_u8(state); 436 state = vaeseq_u8(state, key5); 437 state = vaesmcq_u8(state); 438 state = vaeseq_u8(state, key6); 439 state = vaesmcq_u8(state); 440 state = vaeseq_u8(state, key7); 441 state = vaesmcq_u8(state); 442 state = vaeseq_u8(state, key8); 443 state = vaesmcq_u8(state); 444 state = vaeseq_u8(state, key9); 445 state = vaesmcq_u8(state); 446 state = vaeseq_u8(state, key10); 447 state = vaesmcq_u8(state); 448 state = vaeseq_u8(state, key11); 449 state = vaesmcq_u8(state); 450 state = vaeseq_u8(state, key12); 451 /* AddRoundKey */ 452 state = veorq_u8(state, key13); 453 454 #if defined(HAVE_UNALIGNED_ACCESS) 455 vst1q_u8(output, state); 456 #else 457 if ((uintptr_t)output & 0x7) { 458 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 459 memcpy(output, buf, 16); 460 } else { 461 vst1q_u8(__builtin_assume_aligned(output, 8), state); 462 } 463 #endif 464 output += 16; 465 } 466 467 return SECSuccess; 468 } 469 470 SECStatus 471 arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, 472 unsigned int *outputLen, 473 unsigned int maxOutputLen, 474 const unsigned char *input, 475 unsigned int inputLen, 476 unsigned int blocksize) 477 { 478 #if !defined(HAVE_UNALIGNED_ACCESS) 479 pre_align unsigned char buf[16] post_align; 480 #endif 481 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 482 uint8x16_t key11, key12, key13; 483 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 484 485 if (!inputLen) { 486 return SECSuccess; 487 } 488 489 key1 = vld1q_u8(key); 490 key2 = vld1q_u8(key + 16); 491 key3 = vld1q_u8(key + 32); 492 key4 = vld1q_u8(key + 48); 493 key5 = vld1q_u8(key + 64); 494 key6 = vld1q_u8(key + 80); 495 key7 = vld1q_u8(key + 96); 496 key8 = vld1q_u8(key + 112); 497 key9 = vld1q_u8(key + 128); 498 key10 = vld1q_u8(key + 144); 499 key11 = vld1q_u8(key + 160); 500 key12 = vld1q_u8(key + 176); 501 key13 = vld1q_u8(key + 192); 502 503 while (inputLen > 0) { 504 uint8x16_t state; 505 #if defined(HAVE_UNALIGNED_ACCESS) 506 state = vld1q_u8(input); 507 #else 508 if ((uintptr_t)input & 0x7) { 509 memcpy(buf, input, 16); 510 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 511 } else { 512 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 513 } 514 #endif 515 input += 16; 516 inputLen -= 16; 517 518 /* Rounds */ 519 state = vaesdq_u8(state, key13); 520 state = vaesimcq_u8(state); 521 state = vaesdq_u8(state, key12); 522 state = vaesimcq_u8(state); 523 state = vaesdq_u8(state, key11); 524 state = vaesimcq_u8(state); 525 state = vaesdq_u8(state, key10); 526 state = vaesimcq_u8(state); 527 state = vaesdq_u8(state, key9); 528 state = vaesimcq_u8(state); 529 state = vaesdq_u8(state, key8); 530 state = vaesimcq_u8(state); 531 state = vaesdq_u8(state, key7); 532 state = vaesimcq_u8(state); 533 state = vaesdq_u8(state, key6); 534 state = vaesimcq_u8(state); 535 state = vaesdq_u8(state, key5); 536 state = vaesimcq_u8(state); 537 state = vaesdq_u8(state, key4); 538 state = vaesimcq_u8(state); 539 state = vaesdq_u8(state, key3); 540 state = vaesimcq_u8(state); 541 state = vaesdq_u8(state, key2); 542 /* AddRoundKey */ 543 state = veorq_u8(state, key1); 544 545 #if defined(HAVE_UNALIGNED_ACCESS) 546 vst1q_u8(output, state); 547 #else 548 if ((uintptr_t)output & 0x7) { 549 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 550 memcpy(output, buf, 16); 551 } else { 552 vst1q_u8(__builtin_assume_aligned(output, 8), state); 553 } 554 #endif 555 output += 16; 556 } 557 558 return SECSuccess; 559 } 560 561 SECStatus 562 arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, 563 unsigned int *outputLen, 564 unsigned int maxOutputLen, 565 const unsigned char *input, 566 unsigned int inputLen, 567 unsigned int blocksize) 568 { 569 #if !defined(HAVE_UNALIGNED_ACCESS) 570 pre_align unsigned char buf[16] post_align; 571 #endif 572 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 573 uint8x16_t key11, key12, key13; 574 uint8x16_t iv; 575 PRUint8 *key = (PRUint8 *)cx->k.expandedKey; 576 577 if (!inputLen) { 578 return SECSuccess; 579 } 580 581 /* iv */ 582 iv = vld1q_u8(cx->iv); 583 584 /* expanedKey */ 585 key1 = vld1q_u8(key); 586 key2 = vld1q_u8(key + 16); 587 key3 = vld1q_u8(key + 32); 588 key4 = vld1q_u8(key + 48); 589 key5 = vld1q_u8(key + 64); 590 key6 = vld1q_u8(key + 80); 591 key7 = vld1q_u8(key + 96); 592 key8 = vld1q_u8(key + 112); 593 key9 = vld1q_u8(key + 128); 594 key10 = vld1q_u8(key + 144); 595 key11 = vld1q_u8(key + 160); 596 key12 = vld1q_u8(key + 176); 597 key13 = vld1q_u8(key + 192); 598 599 while (inputLen > 0) { 600 uint8x16_t state; 601 #if defined(HAVE_UNALIGNED_ACCESS) 602 state = vld1q_u8(input); 603 #else 604 if ((uintptr_t)input & 0x7) { 605 memcpy(buf, input, 16); 606 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 607 } else { 608 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 609 } 610 #endif 611 input += 16; 612 inputLen -= 16; 613 614 state = veorq_u8(state, iv); 615 616 /* Rounds */ 617 state = vaeseq_u8(state, key1); 618 state = vaesmcq_u8(state); 619 state = vaeseq_u8(state, key2); 620 state = vaesmcq_u8(state); 621 state = vaeseq_u8(state, key3); 622 state = vaesmcq_u8(state); 623 state = vaeseq_u8(state, key4); 624 state = vaesmcq_u8(state); 625 state = vaeseq_u8(state, key5); 626 state = vaesmcq_u8(state); 627 state = vaeseq_u8(state, key6); 628 state = vaesmcq_u8(state); 629 state = vaeseq_u8(state, key7); 630 state = vaesmcq_u8(state); 631 state = vaeseq_u8(state, key8); 632 state = vaesmcq_u8(state); 633 state = vaeseq_u8(state, key9); 634 state = vaesmcq_u8(state); 635 state = vaeseq_u8(state, key10); 636 state = vaesmcq_u8(state); 637 state = vaeseq_u8(state, key11); 638 state = vaesmcq_u8(state); 639 state = vaeseq_u8(state, key12); 640 state = veorq_u8(state, key13); 641 642 #if defined(HAVE_UNALIGNED_ACCESS) 643 vst1q_u8(output, state); 644 #else 645 if ((uintptr_t)output & 0x7) { 646 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 647 memcpy(output, buf, 16); 648 } else { 649 vst1q_u8(__builtin_assume_aligned(output, 8), state); 650 } 651 #endif 652 output += 16; 653 iv = state; 654 } 655 vst1q_u8(cx->iv, iv); 656 657 return SECSuccess; 658 } 659 660 SECStatus 661 arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, 662 unsigned int *outputLen, 663 unsigned int maxOutputLen, 664 const unsigned char *input, 665 unsigned int inputLen, 666 unsigned int blocksize) 667 { 668 #if !defined(HAVE_UNALIGNED_ACCESS) 669 pre_align unsigned char buf[16] post_align; 670 #endif 671 uint8x16_t iv; 672 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 673 uint8x16_t key11, key12, key13; 674 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 675 676 if (!inputLen) { 677 return SECSuccess; 678 } 679 680 /* iv */ 681 iv = vld1q_u8(cx->iv); 682 683 /* expanedKey */ 684 key1 = vld1q_u8(key); 685 key2 = vld1q_u8(key + 16); 686 key3 = vld1q_u8(key + 32); 687 key4 = vld1q_u8(key + 48); 688 key5 = vld1q_u8(key + 64); 689 key6 = vld1q_u8(key + 80); 690 key7 = vld1q_u8(key + 96); 691 key8 = vld1q_u8(key + 112); 692 key9 = vld1q_u8(key + 128); 693 key10 = vld1q_u8(key + 144); 694 key11 = vld1q_u8(key + 160); 695 key12 = vld1q_u8(key + 176); 696 key13 = vld1q_u8(key + 192); 697 698 while (inputLen > 0) { 699 uint8x16_t state, old_state; 700 #if defined(HAVE_UNALIGNED_ACCESS) 701 state = vld1q_u8(input); 702 #else 703 if ((uintptr_t)input & 0x7) { 704 memcpy(buf, input, 16); 705 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 706 } else { 707 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 708 } 709 #endif 710 old_state = state; 711 input += 16; 712 inputLen -= 16; 713 714 /* Rounds */ 715 state = vaesdq_u8(state, key13); 716 state = vaesimcq_u8(state); 717 state = vaesdq_u8(state, key12); 718 state = vaesimcq_u8(state); 719 state = vaesdq_u8(state, key11); 720 state = vaesimcq_u8(state); 721 state = vaesdq_u8(state, key10); 722 state = vaesimcq_u8(state); 723 state = vaesdq_u8(state, key9); 724 state = vaesimcq_u8(state); 725 state = vaesdq_u8(state, key8); 726 state = vaesimcq_u8(state); 727 state = vaesdq_u8(state, key7); 728 state = vaesimcq_u8(state); 729 state = vaesdq_u8(state, key6); 730 state = vaesimcq_u8(state); 731 state = vaesdq_u8(state, key5); 732 state = vaesimcq_u8(state); 733 state = vaesdq_u8(state, key4); 734 state = vaesimcq_u8(state); 735 state = vaesdq_u8(state, key3); 736 state = vaesimcq_u8(state); 737 state = vaesdq_u8(state, key2); 738 /* AddRoundKey */ 739 state = veorq_u8(state, key1); 740 741 state = veorq_u8(state, iv); 742 743 #if defined(HAVE_UNALIGNED_ACCESS) 744 vst1q_u8(output, state); 745 #else 746 if ((uintptr_t)output & 0x7) { 747 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 748 memcpy(output, buf, 16); 749 } else { 750 vst1q_u8(__builtin_assume_aligned(output, 8), state); 751 } 752 #endif 753 output += 16; 754 755 iv = old_state; 756 } 757 vst1q_u8(cx->iv, iv); 758 759 return SECSuccess; 760 } 761 762 SECStatus 763 arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, 764 unsigned int *outputLen, 765 unsigned int maxOutputLen, 766 const unsigned char *input, 767 unsigned int inputLen, 768 unsigned int blocksize) 769 { 770 #if !defined(HAVE_UNALIGNED_ACCESS) 771 pre_align unsigned char buf[16] post_align; 772 #endif 773 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 774 uint8x16_t key11, key12, key13, key14, key15; 775 PRUint8 *key = (PRUint8 *)cx->k.expandedKey; 776 777 if (inputLen == 0) { 778 return SECSuccess; 779 } 780 781 key1 = vld1q_u8(key); 782 key2 = vld1q_u8(key + 16); 783 key3 = vld1q_u8(key + 32); 784 key4 = vld1q_u8(key + 48); 785 key5 = vld1q_u8(key + 64); 786 key6 = vld1q_u8(key + 80); 787 key7 = vld1q_u8(key + 96); 788 key8 = vld1q_u8(key + 112); 789 key9 = vld1q_u8(key + 128); 790 key10 = vld1q_u8(key + 144); 791 key11 = vld1q_u8(key + 160); 792 key12 = vld1q_u8(key + 176); 793 key13 = vld1q_u8(key + 192); 794 key14 = vld1q_u8(key + 208); 795 key15 = vld1q_u8(key + 224); 796 797 while (inputLen > 0) { 798 uint8x16_t state; 799 #if defined(HAVE_UNALIGNED_ACCESS) 800 state = vld1q_u8(input); 801 #else 802 if ((uintptr_t)input & 0x7) { 803 memcpy(buf, input, 16); 804 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 805 } else { 806 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 807 } 808 #endif 809 input += 16; 810 inputLen -= 16; 811 812 /* Rounds */ 813 state = vaeseq_u8(state, key1); 814 state = vaesmcq_u8(state); 815 state = vaeseq_u8(state, key2); 816 state = vaesmcq_u8(state); 817 state = vaeseq_u8(state, key3); 818 state = vaesmcq_u8(state); 819 state = vaeseq_u8(state, key4); 820 state = vaesmcq_u8(state); 821 state = vaeseq_u8(state, key5); 822 state = vaesmcq_u8(state); 823 state = vaeseq_u8(state, key6); 824 state = vaesmcq_u8(state); 825 state = vaeseq_u8(state, key7); 826 state = vaesmcq_u8(state); 827 state = vaeseq_u8(state, key8); 828 state = vaesmcq_u8(state); 829 state = vaeseq_u8(state, key9); 830 state = vaesmcq_u8(state); 831 state = vaeseq_u8(state, key10); 832 state = vaesmcq_u8(state); 833 state = vaeseq_u8(state, key11); 834 state = vaesmcq_u8(state); 835 state = vaeseq_u8(state, key12); 836 state = vaesmcq_u8(state); 837 state = vaeseq_u8(state, key13); 838 state = vaesmcq_u8(state); 839 state = vaeseq_u8(state, key14); 840 /* AddRoundKey */ 841 state = veorq_u8(state, key15); 842 843 #if defined(HAVE_UNALIGNED_ACCESS) 844 vst1q_u8(output, state); 845 #else 846 if ((uintptr_t)output & 0x7) { 847 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 848 memcpy(output, buf, 16); 849 } else { 850 vst1q_u8(__builtin_assume_aligned(output, 8), state); 851 } 852 #endif 853 output += 16; 854 } 855 return SECSuccess; 856 } 857 858 SECStatus 859 arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, 860 unsigned int *outputLen, 861 unsigned int maxOutputLen, 862 const unsigned char *input, 863 unsigned int inputLen, 864 unsigned int blocksize) 865 { 866 #if !defined(HAVE_UNALIGNED_ACCESS) 867 pre_align unsigned char buf[16] post_align; 868 #endif 869 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 870 uint8x16_t key11, key12, key13, key14, key15; 871 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 872 873 if (!inputLen) { 874 return SECSuccess; 875 } 876 877 key1 = vld1q_u8(key); 878 key2 = vld1q_u8(key + 16); 879 key3 = vld1q_u8(key + 32); 880 key4 = vld1q_u8(key + 48); 881 key5 = vld1q_u8(key + 64); 882 key6 = vld1q_u8(key + 80); 883 key7 = vld1q_u8(key + 96); 884 key8 = vld1q_u8(key + 112); 885 key9 = vld1q_u8(key + 128); 886 key10 = vld1q_u8(key + 144); 887 key11 = vld1q_u8(key + 160); 888 key12 = vld1q_u8(key + 176); 889 key13 = vld1q_u8(key + 192); 890 key14 = vld1q_u8(key + 208); 891 key15 = vld1q_u8(key + 224); 892 893 while (inputLen > 0) { 894 uint8x16_t state; 895 #if defined(HAVE_UNALIGNED_ACCESS) 896 state = vld1q_u8(input); 897 #else 898 if ((uintptr_t)input & 0x7) { 899 memcpy(buf, input, 16); 900 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 901 } else { 902 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 903 } 904 #endif 905 input += 16; 906 inputLen -= 16; 907 908 /* Rounds */ 909 state = vaesdq_u8(state, key15); 910 state = vaesimcq_u8(state); 911 state = vaesdq_u8(state, key14); 912 state = vaesimcq_u8(state); 913 state = vaesdq_u8(state, key13); 914 state = vaesimcq_u8(state); 915 state = vaesdq_u8(state, key12); 916 state = vaesimcq_u8(state); 917 state = vaesdq_u8(state, key11); 918 state = vaesimcq_u8(state); 919 state = vaesdq_u8(state, key10); 920 state = vaesimcq_u8(state); 921 state = vaesdq_u8(state, key9); 922 state = vaesimcq_u8(state); 923 state = vaesdq_u8(state, key8); 924 state = vaesimcq_u8(state); 925 state = vaesdq_u8(state, key7); 926 state = vaesimcq_u8(state); 927 state = vaesdq_u8(state, key6); 928 state = vaesimcq_u8(state); 929 state = vaesdq_u8(state, key5); 930 state = vaesimcq_u8(state); 931 state = vaesdq_u8(state, key4); 932 state = vaesimcq_u8(state); 933 state = vaesdq_u8(state, key3); 934 state = vaesimcq_u8(state); 935 state = vaesdq_u8(state, key2); 936 /* AddRoundKey */ 937 state = veorq_u8(state, key1); 938 939 #if defined(HAVE_UNALIGNED_ACCESS) 940 vst1q_u8(output, state); 941 #else 942 if ((uintptr_t)output & 0x7) { 943 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 944 memcpy(output, buf, 16); 945 } else { 946 vst1q_u8(__builtin_assume_aligned(output, 8), state); 947 } 948 #endif 949 output += 16; 950 } 951 952 return SECSuccess; 953 } 954 955 SECStatus 956 arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, 957 unsigned int *outputLen, 958 unsigned int maxOutputLen, 959 const unsigned char *input, 960 unsigned int inputLen, 961 unsigned int blocksize) 962 { 963 #if !defined(HAVE_UNALIGNED_ACCESS) 964 pre_align unsigned char buf[16] post_align; 965 #endif 966 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 967 uint8x16_t key11, key12, key13, key14, key15; 968 uint8x16_t iv; 969 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 970 971 if (!inputLen) { 972 return SECSuccess; 973 } 974 975 /* iv */ 976 iv = vld1q_u8(cx->iv); 977 978 /* expanedKey */ 979 key1 = vld1q_u8(key); 980 key2 = vld1q_u8(key + 16); 981 key3 = vld1q_u8(key + 32); 982 key4 = vld1q_u8(key + 48); 983 key5 = vld1q_u8(key + 64); 984 key6 = vld1q_u8(key + 80); 985 key7 = vld1q_u8(key + 96); 986 key8 = vld1q_u8(key + 112); 987 key9 = vld1q_u8(key + 128); 988 key10 = vld1q_u8(key + 144); 989 key11 = vld1q_u8(key + 160); 990 key12 = vld1q_u8(key + 176); 991 key13 = vld1q_u8(key + 192); 992 key14 = vld1q_u8(key + 208); 993 key15 = vld1q_u8(key + 224); 994 995 while (inputLen > 0) { 996 uint8x16_t state; 997 #if defined(HAVE_UNALIGNED_ACCESS) 998 state = vld1q_u8(input); 999 #else 1000 if ((uintptr_t)input & 0x7) { 1001 memcpy(buf, input, 16); 1002 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 1003 } else { 1004 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 1005 } 1006 #endif 1007 input += 16; 1008 inputLen -= 16; 1009 1010 state = veorq_u8(state, iv); 1011 1012 /* Rounds */ 1013 state = vaeseq_u8(state, key1); 1014 state = vaesmcq_u8(state); 1015 state = vaeseq_u8(state, key2); 1016 state = vaesmcq_u8(state); 1017 state = vaeseq_u8(state, key3); 1018 state = vaesmcq_u8(state); 1019 state = vaeseq_u8(state, key4); 1020 state = vaesmcq_u8(state); 1021 state = vaeseq_u8(state, key5); 1022 state = vaesmcq_u8(state); 1023 state = vaeseq_u8(state, key6); 1024 state = vaesmcq_u8(state); 1025 state = vaeseq_u8(state, key7); 1026 state = vaesmcq_u8(state); 1027 state = vaeseq_u8(state, key8); 1028 state = vaesmcq_u8(state); 1029 state = vaeseq_u8(state, key9); 1030 state = vaesmcq_u8(state); 1031 state = vaeseq_u8(state, key10); 1032 state = vaesmcq_u8(state); 1033 state = vaeseq_u8(state, key11); 1034 state = vaesmcq_u8(state); 1035 state = vaeseq_u8(state, key12); 1036 state = vaesmcq_u8(state); 1037 state = vaeseq_u8(state, key13); 1038 state = vaesmcq_u8(state); 1039 state = vaeseq_u8(state, key14); 1040 /* AddRoundKey */ 1041 state = veorq_u8(state, key15); 1042 1043 #if defined(HAVE_UNALIGNED_ACCESS) 1044 vst1q_u8(output, state); 1045 #else 1046 if ((uintptr_t)output & 0x7) { 1047 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 1048 memcpy(output, buf, 16); 1049 } else { 1050 vst1q_u8(__builtin_assume_aligned(output, 8), state); 1051 } 1052 #endif 1053 output += 16; 1054 iv = state; 1055 } 1056 vst1q_u8(cx->iv, iv); 1057 1058 return SECSuccess; 1059 } 1060 1061 SECStatus 1062 arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, 1063 unsigned int *outputLen, 1064 unsigned int maxOutputLen, 1065 const unsigned char *input, 1066 unsigned int inputLen, 1067 unsigned int blocksize) 1068 { 1069 #if !defined(HAVE_UNALIGNED_ACCESS) 1070 pre_align unsigned char buf[16] post_align; 1071 #endif 1072 uint8x16_t iv; 1073 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; 1074 uint8x16_t key11, key12, key13, key14, key15; 1075 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; 1076 1077 if (!inputLen) { 1078 return SECSuccess; 1079 } 1080 1081 /* iv */ 1082 iv = vld1q_u8(cx->iv); 1083 1084 /* expanedKey */ 1085 key1 = vld1q_u8(key); 1086 key2 = vld1q_u8(key + 16); 1087 key3 = vld1q_u8(key + 32); 1088 key4 = vld1q_u8(key + 48); 1089 key5 = vld1q_u8(key + 64); 1090 key6 = vld1q_u8(key + 80); 1091 key7 = vld1q_u8(key + 96); 1092 key8 = vld1q_u8(key + 112); 1093 key9 = vld1q_u8(key + 128); 1094 key10 = vld1q_u8(key + 144); 1095 key11 = vld1q_u8(key + 160); 1096 key12 = vld1q_u8(key + 176); 1097 key13 = vld1q_u8(key + 192); 1098 key14 = vld1q_u8(key + 208); 1099 key15 = vld1q_u8(key + 224); 1100 1101 while (inputLen > 0) { 1102 uint8x16_t state, old_state; 1103 #if defined(HAVE_UNALIGNED_ACCESS) 1104 state = vld1q_u8(input); 1105 #else 1106 if ((uintptr_t)input & 0x7) { 1107 memcpy(buf, input, 16); 1108 state = vld1q_u8(__builtin_assume_aligned(buf, 16)); 1109 } else { 1110 state = vld1q_u8(__builtin_assume_aligned(input, 8)); 1111 } 1112 #endif 1113 old_state = state; 1114 input += 16; 1115 inputLen -= 16; 1116 1117 /* Rounds */ 1118 state = vaesdq_u8(state, key15); 1119 state = vaesimcq_u8(state); 1120 state = vaesdq_u8(state, key14); 1121 state = vaesimcq_u8(state); 1122 state = vaesdq_u8(state, key13); 1123 state = vaesimcq_u8(state); 1124 state = vaesdq_u8(state, key12); 1125 state = vaesimcq_u8(state); 1126 state = vaesdq_u8(state, key11); 1127 state = vaesimcq_u8(state); 1128 state = vaesdq_u8(state, key10); 1129 state = vaesimcq_u8(state); 1130 state = vaesdq_u8(state, key9); 1131 state = vaesimcq_u8(state); 1132 state = vaesdq_u8(state, key8); 1133 state = vaesimcq_u8(state); 1134 state = vaesdq_u8(state, key7); 1135 state = vaesimcq_u8(state); 1136 state = vaesdq_u8(state, key6); 1137 state = vaesimcq_u8(state); 1138 state = vaesdq_u8(state, key5); 1139 state = vaesimcq_u8(state); 1140 state = vaesdq_u8(state, key4); 1141 state = vaesimcq_u8(state); 1142 state = vaesdq_u8(state, key3); 1143 state = vaesimcq_u8(state); 1144 state = vaesdq_u8(state, key2); 1145 /* AddRoundKey */ 1146 state = veorq_u8(state, key1); 1147 1148 state = veorq_u8(state, iv); 1149 1150 #if defined(HAVE_UNALIGNED_ACCESS) 1151 vst1q_u8(output, state); 1152 #else 1153 if ((uintptr_t)output & 0x7) { 1154 vst1q_u8(__builtin_assume_aligned(buf, 16), state); 1155 memcpy(output, buf, 16); 1156 } else { 1157 vst1q_u8(__builtin_assume_aligned(output, 8), state); 1158 } 1159 #endif 1160 output += 16; 1161 1162 iv = old_state; 1163 } 1164 vst1q_u8(cx->iv, iv); 1165 1166 return SECSuccess; 1167 } 1168 1169 #endif