tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aes-armv8.c (35181B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "secerr.h"
      6 #include "rijndael.h"
      7 
      8 #if ((defined(__clang__) ||                                         \
      9      (defined(__GNUC__) && defined(__GNUC_MINOR__) &&              \
     10       (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \
     11     defined(IS_LITTLE_ENDIAN))
     12 
     13 #ifndef __ARM_FEATURE_CRYPTO
     14 #error "Compiler option is invalid"
     15 #endif
     16 
     17 #include <arm_neon.h>
     18 
     19 SECStatus
     20 arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
     21                        unsigned int *outputLen,
     22                        unsigned int maxOutputLen,
     23                        const unsigned char *input,
     24                        unsigned int inputLen,
     25                        unsigned int blocksize)
     26 {
     27 #if !defined(HAVE_UNALIGNED_ACCESS)
     28    pre_align unsigned char buf[16] post_align;
     29 #endif
     30    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     31    uint8x16_t key11;
     32    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
     33 
     34    if (!inputLen) {
     35        return SECSuccess;
     36    }
     37 
     38    key1 = vld1q_u8(key);
     39    key2 = vld1q_u8(key + 16);
     40    key3 = vld1q_u8(key + 32);
     41    key4 = vld1q_u8(key + 48);
     42    key5 = vld1q_u8(key + 64);
     43    key6 = vld1q_u8(key + 80);
     44    key7 = vld1q_u8(key + 96);
     45    key8 = vld1q_u8(key + 112);
     46    key9 = vld1q_u8(key + 128);
     47    key10 = vld1q_u8(key + 144);
     48    key11 = vld1q_u8(key + 160);
     49 
     50    while (inputLen > 0) {
     51        uint8x16_t state;
     52 #if defined(HAVE_UNALIGNED_ACCESS)
     53        state = vld1q_u8(input);
     54 #else
     55        if ((uintptr_t)input & 0x7) {
     56            memcpy(buf, input, 16);
     57            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
     58        } else {
     59            state = vld1q_u8(__builtin_assume_aligned(input, 8));
     60        }
     61 #endif
     62        input += 16;
     63        inputLen -= 16;
     64 
     65        /* Rounds */
     66        state = vaeseq_u8(state, key1);
     67        state = vaesmcq_u8(state);
     68        state = vaeseq_u8(state, key2);
     69        state = vaesmcq_u8(state);
     70        state = vaeseq_u8(state, key3);
     71        state = vaesmcq_u8(state);
     72        state = vaeseq_u8(state, key4);
     73        state = vaesmcq_u8(state);
     74        state = vaeseq_u8(state, key5);
     75        state = vaesmcq_u8(state);
     76        state = vaeseq_u8(state, key6);
     77        state = vaesmcq_u8(state);
     78        state = vaeseq_u8(state, key7);
     79        state = vaesmcq_u8(state);
     80        state = vaeseq_u8(state, key8);
     81        state = vaesmcq_u8(state);
     82        state = vaeseq_u8(state, key9);
     83        state = vaesmcq_u8(state);
     84        state = vaeseq_u8(state, key10);
     85        /* AddRoundKey */
     86        state = veorq_u8(state, key11);
     87 
     88 #if defined(HAVE_UNALIGNED_ACCESS)
     89        vst1q_u8(output, state);
     90 #else
     91        if ((uintptr_t)output & 0x7) {
     92            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
     93            memcpy(output, buf, 16);
     94        } else {
     95            vst1q_u8(__builtin_assume_aligned(output, 8), state);
     96        }
     97 #endif
     98        output += 16;
     99    }
    100 
    101    return SECSuccess;
    102 }
    103 
    104 SECStatus
    105 arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
    106                        unsigned int *outputLen,
    107                        unsigned int maxOutputLen,
    108                        const unsigned char *input,
    109                        unsigned int inputLen,
    110                        unsigned int blocksize)
    111 {
    112 #if !defined(HAVE_UNALIGNED_ACCESS)
    113    pre_align unsigned char buf[16] post_align;
    114 #endif
    115    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    116    uint8x16_t key11;
    117    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    118 
    119    if (inputLen == 0) {
    120        return SECSuccess;
    121    }
    122 
    123    key1 = vld1q_u8(key);
    124    key2 = vld1q_u8(key + 16);
    125    key3 = vld1q_u8(key + 32);
    126    key4 = vld1q_u8(key + 48);
    127    key5 = vld1q_u8(key + 64);
    128    key6 = vld1q_u8(key + 80);
    129    key7 = vld1q_u8(key + 96);
    130    key8 = vld1q_u8(key + 112);
    131    key9 = vld1q_u8(key + 128);
    132    key10 = vld1q_u8(key + 144);
    133    key11 = vld1q_u8(key + 160);
    134 
    135    while (inputLen > 0) {
    136        uint8x16_t state;
    137 #if defined(HAVE_UNALIGNED_ACCESS)
    138        state = vld1q_u8(input);
    139 #else
    140        if ((uintptr_t)input & 0x7) {
    141            memcpy(buf, input, 16);
    142            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    143        } else {
    144            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    145        }
    146 #endif
    147        input += 16;
    148        inputLen -= 16;
    149 
    150        /* Rounds */
    151        state = vaesdq_u8(state, key11);
    152        state = vaesimcq_u8(state);
    153        state = vaesdq_u8(state, key10);
    154        state = vaesimcq_u8(state);
    155        state = vaesdq_u8(state, key9);
    156        state = vaesimcq_u8(state);
    157        state = vaesdq_u8(state, key8);
    158        state = vaesimcq_u8(state);
    159        state = vaesdq_u8(state, key7);
    160        state = vaesimcq_u8(state);
    161        state = vaesdq_u8(state, key6);
    162        state = vaesimcq_u8(state);
    163        state = vaesdq_u8(state, key5);
    164        state = vaesimcq_u8(state);
    165        state = vaesdq_u8(state, key4);
    166        state = vaesimcq_u8(state);
    167        state = vaesdq_u8(state, key3);
    168        state = vaesimcq_u8(state);
    169        state = vaesdq_u8(state, key2);
    170        /* AddRoundKey */
    171        state = veorq_u8(state, key1);
    172 
    173 #if defined(HAVE_UNALIGNED_ACCESS)
    174        vst1q_u8(output, state);
    175 #else
    176        if ((uintptr_t)output & 0x7) {
    177            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    178            memcpy(output, buf, 16);
    179        } else {
    180            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    181        }
    182 #endif
    183        output += 16;
    184    }
    185 
    186    return SECSuccess;
    187 }
    188 
    189 SECStatus
    190 arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
    191                        unsigned int *outputLen,
    192                        unsigned int maxOutputLen,
    193                        const unsigned char *input,
    194                        unsigned int inputLen,
    195                        unsigned int blocksize)
    196 {
    197 #if !defined(HAVE_UNALIGNED_ACCESS)
    198    pre_align unsigned char buf[16] post_align;
    199 #endif
    200    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    201    uint8x16_t key11;
    202    uint8x16_t iv;
    203    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    204 
    205    if (!inputLen) {
    206        return SECSuccess;
    207    }
    208 
    209    /* iv */
    210    iv = vld1q_u8(cx->iv);
    211 
    212    /* expanedKey */
    213    key1 = vld1q_u8(key);
    214    key2 = vld1q_u8(key + 16);
    215    key3 = vld1q_u8(key + 32);
    216    key4 = vld1q_u8(key + 48);
    217    key5 = vld1q_u8(key + 64);
    218    key6 = vld1q_u8(key + 80);
    219    key7 = vld1q_u8(key + 96);
    220    key8 = vld1q_u8(key + 112);
    221    key9 = vld1q_u8(key + 128);
    222    key10 = vld1q_u8(key + 144);
    223    key11 = vld1q_u8(key + 160);
    224 
    225    while (inputLen > 0) {
    226        uint8x16_t state;
    227 #if defined(HAVE_UNALIGNED_ACCESS)
    228        state = vld1q_u8(input);
    229 #else
    230        if ((uintptr_t)input & 0x7) {
    231            memcpy(buf, input, 16);
    232            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    233        } else {
    234            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    235        }
    236 #endif
    237        input += 16;
    238        inputLen -= 16;
    239 
    240        state = veorq_u8(state, iv);
    241 
    242        /* Rounds */
    243        state = vaeseq_u8(state, key1);
    244        state = vaesmcq_u8(state);
    245        state = vaeseq_u8(state, key2);
    246        state = vaesmcq_u8(state);
    247        state = vaeseq_u8(state, key3);
    248        state = vaesmcq_u8(state);
    249        state = vaeseq_u8(state, key4);
    250        state = vaesmcq_u8(state);
    251        state = vaeseq_u8(state, key5);
    252        state = vaesmcq_u8(state);
    253        state = vaeseq_u8(state, key6);
    254        state = vaesmcq_u8(state);
    255        state = vaeseq_u8(state, key7);
    256        state = vaesmcq_u8(state);
    257        state = vaeseq_u8(state, key8);
    258        state = vaesmcq_u8(state);
    259        state = vaeseq_u8(state, key9);
    260        state = vaesmcq_u8(state);
    261        state = vaeseq_u8(state, key10);
    262        /* AddRoundKey */
    263        state = veorq_u8(state, key11);
    264 
    265 #if defined(HAVE_UNALIGNED_ACCESS)
    266        vst1q_u8(output, state);
    267 #else
    268        if ((uintptr_t)output & 0x7) {
    269            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    270            memcpy(output, buf, 16);
    271        } else {
    272            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    273        }
    274 #endif
    275        output += 16;
    276        iv = state;
    277    }
    278    vst1q_u8(cx->iv, iv);
    279 
    280    return SECSuccess;
    281 }
    282 
    283 SECStatus
    284 arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
    285                        unsigned int *outputLen,
    286                        unsigned int maxOutputLen,
    287                        const unsigned char *input,
    288                        unsigned int inputLen,
    289                        unsigned int blocksize)
    290 {
    291 #if !defined(HAVE_UNALIGNED_ACCESS)
    292    pre_align unsigned char buf[16] post_align;
    293 #endif
    294    uint8x16_t iv;
    295    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    296    uint8x16_t key11;
    297    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    298 
    299    if (!inputLen) {
    300        return SECSuccess;
    301    }
    302 
    303    /* iv */
    304    iv = vld1q_u8(cx->iv);
    305 
    306    /* expanedKey */
    307    key1 = vld1q_u8(key);
    308    key2 = vld1q_u8(key + 16);
    309    key3 = vld1q_u8(key + 32);
    310    key4 = vld1q_u8(key + 48);
    311    key5 = vld1q_u8(key + 64);
    312    key6 = vld1q_u8(key + 80);
    313    key7 = vld1q_u8(key + 96);
    314    key8 = vld1q_u8(key + 112);
    315    key9 = vld1q_u8(key + 128);
    316    key10 = vld1q_u8(key + 144);
    317    key11 = vld1q_u8(key + 160);
    318 
    319    while (inputLen > 0) {
    320        uint8x16_t state, old_state;
    321 #if defined(HAVE_UNALIGNED_ACCESS)
    322        state = vld1q_u8(input);
    323 #else
    324        if ((uintptr_t)input & 0x7) {
    325            memcpy(buf, input, 16);
    326            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    327        } else {
    328            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    329        }
    330 #endif
    331        old_state = state;
    332        input += 16;
    333        inputLen -= 16;
    334 
    335        /* Rounds */
    336        state = vaesdq_u8(state, key11);
    337        state = vaesimcq_u8(state);
    338        state = vaesdq_u8(state, key10);
    339        state = vaesimcq_u8(state);
    340        state = vaesdq_u8(state, key9);
    341        state = vaesimcq_u8(state);
    342        state = vaesdq_u8(state, key8);
    343        state = vaesimcq_u8(state);
    344        state = vaesdq_u8(state, key7);
    345        state = vaesimcq_u8(state);
    346        state = vaesdq_u8(state, key6);
    347        state = vaesimcq_u8(state);
    348        state = vaesdq_u8(state, key5);
    349        state = vaesimcq_u8(state);
    350        state = vaesdq_u8(state, key4);
    351        state = vaesimcq_u8(state);
    352        state = vaesdq_u8(state, key3);
    353        state = vaesimcq_u8(state);
    354        state = vaesdq_u8(state, key2);
    355        /* AddRoundKey */
    356        state = veorq_u8(state, key1);
    357 
    358        state = veorq_u8(state, iv);
    359 
    360 #if defined(HAVE_UNALIGNED_ACCESS)
    361        vst1q_u8(output, state);
    362 #else
    363        if ((uintptr_t)output & 0x7) {
    364            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    365            memcpy(output, buf, 16);
    366        } else {
    367            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    368        }
    369 #endif
    370        output += 16;
    371 
    372        iv = old_state;
    373    }
    374    vst1q_u8(cx->iv, iv);
    375 
    376    return SECSuccess;
    377 }
    378 
    379 SECStatus
    380 arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
    381                        unsigned int *outputLen,
    382                        unsigned int maxOutputLen,
    383                        const unsigned char *input,
    384                        unsigned int inputLen,
    385                        unsigned int blocksize)
    386 {
    387 #if !defined(HAVE_UNALIGNED_ACCESS)
    388    pre_align unsigned char buf[16] post_align;
    389 #endif
    390    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    391    uint8x16_t key11, key12, key13;
    392    PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
    393 
    394    if (!inputLen) {
    395        return SECSuccess;
    396    }
    397 
    398    key1 = vld1q_u8(key);
    399    key2 = vld1q_u8(key + 16);
    400    key3 = vld1q_u8(key + 32);
    401    key4 = vld1q_u8(key + 48);
    402    key5 = vld1q_u8(key + 64);
    403    key6 = vld1q_u8(key + 80);
    404    key7 = vld1q_u8(key + 96);
    405    key8 = vld1q_u8(key + 112);
    406    key9 = vld1q_u8(key + 128);
    407    key10 = vld1q_u8(key + 144);
    408    key11 = vld1q_u8(key + 160);
    409    key12 = vld1q_u8(key + 176);
    410    key13 = vld1q_u8(key + 192);
    411 
    412    while (inputLen > 0) {
    413        uint8x16_t state;
    414 #if defined(HAVE_UNALIGNED_ACCESS)
    415        state = vld1q_u8(input);
    416 #else
    417        if ((uintptr_t)input & 0x7) {
    418            memcpy(buf, input, 16);
    419            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    420        } else {
    421            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    422        }
    423 #endif
    424        input += 16;
    425        inputLen -= 16;
    426 
    427        /* Rounds */
    428        state = vaeseq_u8(state, key1);
    429        state = vaesmcq_u8(state);
    430        state = vaeseq_u8(state, key2);
    431        state = vaesmcq_u8(state);
    432        state = vaeseq_u8(state, key3);
    433        state = vaesmcq_u8(state);
    434        state = vaeseq_u8(state, key4);
    435        state = vaesmcq_u8(state);
    436        state = vaeseq_u8(state, key5);
    437        state = vaesmcq_u8(state);
    438        state = vaeseq_u8(state, key6);
    439        state = vaesmcq_u8(state);
    440        state = vaeseq_u8(state, key7);
    441        state = vaesmcq_u8(state);
    442        state = vaeseq_u8(state, key8);
    443        state = vaesmcq_u8(state);
    444        state = vaeseq_u8(state, key9);
    445        state = vaesmcq_u8(state);
    446        state = vaeseq_u8(state, key10);
    447        state = vaesmcq_u8(state);
    448        state = vaeseq_u8(state, key11);
    449        state = vaesmcq_u8(state);
    450        state = vaeseq_u8(state, key12);
    451        /* AddRoundKey */
    452        state = veorq_u8(state, key13);
    453 
    454 #if defined(HAVE_UNALIGNED_ACCESS)
    455        vst1q_u8(output, state);
    456 #else
    457        if ((uintptr_t)output & 0x7) {
    458            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    459            memcpy(output, buf, 16);
    460        } else {
    461            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    462        }
    463 #endif
    464        output += 16;
    465    }
    466 
    467    return SECSuccess;
    468 }
    469 
    470 SECStatus
    471 arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
    472                        unsigned int *outputLen,
    473                        unsigned int maxOutputLen,
    474                        const unsigned char *input,
    475                        unsigned int inputLen,
    476                        unsigned int blocksize)
    477 {
    478 #if !defined(HAVE_UNALIGNED_ACCESS)
    479    pre_align unsigned char buf[16] post_align;
    480 #endif
    481    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    482    uint8x16_t key11, key12, key13;
    483    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    484 
    485    if (!inputLen) {
    486        return SECSuccess;
    487    }
    488 
    489    key1 = vld1q_u8(key);
    490    key2 = vld1q_u8(key + 16);
    491    key3 = vld1q_u8(key + 32);
    492    key4 = vld1q_u8(key + 48);
    493    key5 = vld1q_u8(key + 64);
    494    key6 = vld1q_u8(key + 80);
    495    key7 = vld1q_u8(key + 96);
    496    key8 = vld1q_u8(key + 112);
    497    key9 = vld1q_u8(key + 128);
    498    key10 = vld1q_u8(key + 144);
    499    key11 = vld1q_u8(key + 160);
    500    key12 = vld1q_u8(key + 176);
    501    key13 = vld1q_u8(key + 192);
    502 
    503    while (inputLen > 0) {
    504        uint8x16_t state;
    505 #if defined(HAVE_UNALIGNED_ACCESS)
    506        state = vld1q_u8(input);
    507 #else
    508        if ((uintptr_t)input & 0x7) {
    509            memcpy(buf, input, 16);
    510            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    511        } else {
    512            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    513        }
    514 #endif
    515        input += 16;
    516        inputLen -= 16;
    517 
    518        /* Rounds */
    519        state = vaesdq_u8(state, key13);
    520        state = vaesimcq_u8(state);
    521        state = vaesdq_u8(state, key12);
    522        state = vaesimcq_u8(state);
    523        state = vaesdq_u8(state, key11);
    524        state = vaesimcq_u8(state);
    525        state = vaesdq_u8(state, key10);
    526        state = vaesimcq_u8(state);
    527        state = vaesdq_u8(state, key9);
    528        state = vaesimcq_u8(state);
    529        state = vaesdq_u8(state, key8);
    530        state = vaesimcq_u8(state);
    531        state = vaesdq_u8(state, key7);
    532        state = vaesimcq_u8(state);
    533        state = vaesdq_u8(state, key6);
    534        state = vaesimcq_u8(state);
    535        state = vaesdq_u8(state, key5);
    536        state = vaesimcq_u8(state);
    537        state = vaesdq_u8(state, key4);
    538        state = vaesimcq_u8(state);
    539        state = vaesdq_u8(state, key3);
    540        state = vaesimcq_u8(state);
    541        state = vaesdq_u8(state, key2);
    542        /* AddRoundKey */
    543        state = veorq_u8(state, key1);
    544 
    545 #if defined(HAVE_UNALIGNED_ACCESS)
    546        vst1q_u8(output, state);
    547 #else
    548        if ((uintptr_t)output & 0x7) {
    549            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    550            memcpy(output, buf, 16);
    551        } else {
    552            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    553        }
    554 #endif
    555        output += 16;
    556    }
    557 
    558    return SECSuccess;
    559 }
    560 
    561 SECStatus
    562 arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
    563                        unsigned int *outputLen,
    564                        unsigned int maxOutputLen,
    565                        const unsigned char *input,
    566                        unsigned int inputLen,
    567                        unsigned int blocksize)
    568 {
    569 #if !defined(HAVE_UNALIGNED_ACCESS)
    570    pre_align unsigned char buf[16] post_align;
    571 #endif
    572    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    573    uint8x16_t key11, key12, key13;
    574    uint8x16_t iv;
    575    PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
    576 
    577    if (!inputLen) {
    578        return SECSuccess;
    579    }
    580 
    581    /* iv */
    582    iv = vld1q_u8(cx->iv);
    583 
    584    /* expanedKey */
    585    key1 = vld1q_u8(key);
    586    key2 = vld1q_u8(key + 16);
    587    key3 = vld1q_u8(key + 32);
    588    key4 = vld1q_u8(key + 48);
    589    key5 = vld1q_u8(key + 64);
    590    key6 = vld1q_u8(key + 80);
    591    key7 = vld1q_u8(key + 96);
    592    key8 = vld1q_u8(key + 112);
    593    key9 = vld1q_u8(key + 128);
    594    key10 = vld1q_u8(key + 144);
    595    key11 = vld1q_u8(key + 160);
    596    key12 = vld1q_u8(key + 176);
    597    key13 = vld1q_u8(key + 192);
    598 
    599    while (inputLen > 0) {
    600        uint8x16_t state;
    601 #if defined(HAVE_UNALIGNED_ACCESS)
    602        state = vld1q_u8(input);
    603 #else
    604        if ((uintptr_t)input & 0x7) {
    605            memcpy(buf, input, 16);
    606            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    607        } else {
    608            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    609        }
    610 #endif
    611        input += 16;
    612        inputLen -= 16;
    613 
    614        state = veorq_u8(state, iv);
    615 
    616        /* Rounds */
    617        state = vaeseq_u8(state, key1);
    618        state = vaesmcq_u8(state);
    619        state = vaeseq_u8(state, key2);
    620        state = vaesmcq_u8(state);
    621        state = vaeseq_u8(state, key3);
    622        state = vaesmcq_u8(state);
    623        state = vaeseq_u8(state, key4);
    624        state = vaesmcq_u8(state);
    625        state = vaeseq_u8(state, key5);
    626        state = vaesmcq_u8(state);
    627        state = vaeseq_u8(state, key6);
    628        state = vaesmcq_u8(state);
    629        state = vaeseq_u8(state, key7);
    630        state = vaesmcq_u8(state);
    631        state = vaeseq_u8(state, key8);
    632        state = vaesmcq_u8(state);
    633        state = vaeseq_u8(state, key9);
    634        state = vaesmcq_u8(state);
    635        state = vaeseq_u8(state, key10);
    636        state = vaesmcq_u8(state);
    637        state = vaeseq_u8(state, key11);
    638        state = vaesmcq_u8(state);
    639        state = vaeseq_u8(state, key12);
    640        state = veorq_u8(state, key13);
    641 
    642 #if defined(HAVE_UNALIGNED_ACCESS)
    643        vst1q_u8(output, state);
    644 #else
    645        if ((uintptr_t)output & 0x7) {
    646            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    647            memcpy(output, buf, 16);
    648        } else {
    649            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    650        }
    651 #endif
    652        output += 16;
    653        iv = state;
    654    }
    655    vst1q_u8(cx->iv, iv);
    656 
    657    return SECSuccess;
    658 }
    659 
    660 SECStatus
    661 arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
    662                        unsigned int *outputLen,
    663                        unsigned int maxOutputLen,
    664                        const unsigned char *input,
    665                        unsigned int inputLen,
    666                        unsigned int blocksize)
    667 {
    668 #if !defined(HAVE_UNALIGNED_ACCESS)
    669    pre_align unsigned char buf[16] post_align;
    670 #endif
    671    uint8x16_t iv;
    672    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    673    uint8x16_t key11, key12, key13;
    674    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    675 
    676    if (!inputLen) {
    677        return SECSuccess;
    678    }
    679 
    680    /* iv */
    681    iv = vld1q_u8(cx->iv);
    682 
    683    /* expanedKey */
    684    key1 = vld1q_u8(key);
    685    key2 = vld1q_u8(key + 16);
    686    key3 = vld1q_u8(key + 32);
    687    key4 = vld1q_u8(key + 48);
    688    key5 = vld1q_u8(key + 64);
    689    key6 = vld1q_u8(key + 80);
    690    key7 = vld1q_u8(key + 96);
    691    key8 = vld1q_u8(key + 112);
    692    key9 = vld1q_u8(key + 128);
    693    key10 = vld1q_u8(key + 144);
    694    key11 = vld1q_u8(key + 160);
    695    key12 = vld1q_u8(key + 176);
    696    key13 = vld1q_u8(key + 192);
    697 
    698    while (inputLen > 0) {
    699        uint8x16_t state, old_state;
    700 #if defined(HAVE_UNALIGNED_ACCESS)
    701        state = vld1q_u8(input);
    702 #else
    703        if ((uintptr_t)input & 0x7) {
    704            memcpy(buf, input, 16);
    705            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    706        } else {
    707            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    708        }
    709 #endif
    710        old_state = state;
    711        input += 16;
    712        inputLen -= 16;
    713 
    714        /* Rounds */
    715        state = vaesdq_u8(state, key13);
    716        state = vaesimcq_u8(state);
    717        state = vaesdq_u8(state, key12);
    718        state = vaesimcq_u8(state);
    719        state = vaesdq_u8(state, key11);
    720        state = vaesimcq_u8(state);
    721        state = vaesdq_u8(state, key10);
    722        state = vaesimcq_u8(state);
    723        state = vaesdq_u8(state, key9);
    724        state = vaesimcq_u8(state);
    725        state = vaesdq_u8(state, key8);
    726        state = vaesimcq_u8(state);
    727        state = vaesdq_u8(state, key7);
    728        state = vaesimcq_u8(state);
    729        state = vaesdq_u8(state, key6);
    730        state = vaesimcq_u8(state);
    731        state = vaesdq_u8(state, key5);
    732        state = vaesimcq_u8(state);
    733        state = vaesdq_u8(state, key4);
    734        state = vaesimcq_u8(state);
    735        state = vaesdq_u8(state, key3);
    736        state = vaesimcq_u8(state);
    737        state = vaesdq_u8(state, key2);
    738        /* AddRoundKey */
    739        state = veorq_u8(state, key1);
    740 
    741        state = veorq_u8(state, iv);
    742 
    743 #if defined(HAVE_UNALIGNED_ACCESS)
    744        vst1q_u8(output, state);
    745 #else
    746        if ((uintptr_t)output & 0x7) {
    747            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    748            memcpy(output, buf, 16);
    749        } else {
    750            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    751        }
    752 #endif
    753        output += 16;
    754 
    755        iv = old_state;
    756    }
    757    vst1q_u8(cx->iv, iv);
    758 
    759    return SECSuccess;
    760 }
    761 
    762 SECStatus
    763 arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
    764                        unsigned int *outputLen,
    765                        unsigned int maxOutputLen,
    766                        const unsigned char *input,
    767                        unsigned int inputLen,
    768                        unsigned int blocksize)
    769 {
    770 #if !defined(HAVE_UNALIGNED_ACCESS)
    771    pre_align unsigned char buf[16] post_align;
    772 #endif
    773    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    774    uint8x16_t key11, key12, key13, key14, key15;
    775    PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
    776 
    777    if (inputLen == 0) {
    778        return SECSuccess;
    779    }
    780 
    781    key1 = vld1q_u8(key);
    782    key2 = vld1q_u8(key + 16);
    783    key3 = vld1q_u8(key + 32);
    784    key4 = vld1q_u8(key + 48);
    785    key5 = vld1q_u8(key + 64);
    786    key6 = vld1q_u8(key + 80);
    787    key7 = vld1q_u8(key + 96);
    788    key8 = vld1q_u8(key + 112);
    789    key9 = vld1q_u8(key + 128);
    790    key10 = vld1q_u8(key + 144);
    791    key11 = vld1q_u8(key + 160);
    792    key12 = vld1q_u8(key + 176);
    793    key13 = vld1q_u8(key + 192);
    794    key14 = vld1q_u8(key + 208);
    795    key15 = vld1q_u8(key + 224);
    796 
    797    while (inputLen > 0) {
    798        uint8x16_t state;
    799 #if defined(HAVE_UNALIGNED_ACCESS)
    800        state = vld1q_u8(input);
    801 #else
    802        if ((uintptr_t)input & 0x7) {
    803            memcpy(buf, input, 16);
    804            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    805        } else {
    806            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    807        }
    808 #endif
    809        input += 16;
    810        inputLen -= 16;
    811 
    812        /* Rounds */
    813        state = vaeseq_u8(state, key1);
    814        state = vaesmcq_u8(state);
    815        state = vaeseq_u8(state, key2);
    816        state = vaesmcq_u8(state);
    817        state = vaeseq_u8(state, key3);
    818        state = vaesmcq_u8(state);
    819        state = vaeseq_u8(state, key4);
    820        state = vaesmcq_u8(state);
    821        state = vaeseq_u8(state, key5);
    822        state = vaesmcq_u8(state);
    823        state = vaeseq_u8(state, key6);
    824        state = vaesmcq_u8(state);
    825        state = vaeseq_u8(state, key7);
    826        state = vaesmcq_u8(state);
    827        state = vaeseq_u8(state, key8);
    828        state = vaesmcq_u8(state);
    829        state = vaeseq_u8(state, key9);
    830        state = vaesmcq_u8(state);
    831        state = vaeseq_u8(state, key10);
    832        state = vaesmcq_u8(state);
    833        state = vaeseq_u8(state, key11);
    834        state = vaesmcq_u8(state);
    835        state = vaeseq_u8(state, key12);
    836        state = vaesmcq_u8(state);
    837        state = vaeseq_u8(state, key13);
    838        state = vaesmcq_u8(state);
    839        state = vaeseq_u8(state, key14);
    840        /* AddRoundKey */
    841        state = veorq_u8(state, key15);
    842 
    843 #if defined(HAVE_UNALIGNED_ACCESS)
    844        vst1q_u8(output, state);
    845 #else
    846        if ((uintptr_t)output & 0x7) {
    847            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    848            memcpy(output, buf, 16);
    849        } else {
    850            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    851        }
    852 #endif
    853        output += 16;
    854    }
    855    return SECSuccess;
    856 }
    857 
    858 SECStatus
    859 arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
    860                        unsigned int *outputLen,
    861                        unsigned int maxOutputLen,
    862                        const unsigned char *input,
    863                        unsigned int inputLen,
    864                        unsigned int blocksize)
    865 {
    866 #if !defined(HAVE_UNALIGNED_ACCESS)
    867    pre_align unsigned char buf[16] post_align;
    868 #endif
    869    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    870    uint8x16_t key11, key12, key13, key14, key15;
    871    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    872 
    873    if (!inputLen) {
    874        return SECSuccess;
    875    }
    876 
    877    key1 = vld1q_u8(key);
    878    key2 = vld1q_u8(key + 16);
    879    key3 = vld1q_u8(key + 32);
    880    key4 = vld1q_u8(key + 48);
    881    key5 = vld1q_u8(key + 64);
    882    key6 = vld1q_u8(key + 80);
    883    key7 = vld1q_u8(key + 96);
    884    key8 = vld1q_u8(key + 112);
    885    key9 = vld1q_u8(key + 128);
    886    key10 = vld1q_u8(key + 144);
    887    key11 = vld1q_u8(key + 160);
    888    key12 = vld1q_u8(key + 176);
    889    key13 = vld1q_u8(key + 192);
    890    key14 = vld1q_u8(key + 208);
    891    key15 = vld1q_u8(key + 224);
    892 
    893    while (inputLen > 0) {
    894        uint8x16_t state;
    895 #if defined(HAVE_UNALIGNED_ACCESS)
    896        state = vld1q_u8(input);
    897 #else
    898        if ((uintptr_t)input & 0x7) {
    899            memcpy(buf, input, 16);
    900            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
    901        } else {
    902            state = vld1q_u8(__builtin_assume_aligned(input, 8));
    903        }
    904 #endif
    905        input += 16;
    906        inputLen -= 16;
    907 
    908        /* Rounds */
    909        state = vaesdq_u8(state, key15);
    910        state = vaesimcq_u8(state);
    911        state = vaesdq_u8(state, key14);
    912        state = vaesimcq_u8(state);
    913        state = vaesdq_u8(state, key13);
    914        state = vaesimcq_u8(state);
    915        state = vaesdq_u8(state, key12);
    916        state = vaesimcq_u8(state);
    917        state = vaesdq_u8(state, key11);
    918        state = vaesimcq_u8(state);
    919        state = vaesdq_u8(state, key10);
    920        state = vaesimcq_u8(state);
    921        state = vaesdq_u8(state, key9);
    922        state = vaesimcq_u8(state);
    923        state = vaesdq_u8(state, key8);
    924        state = vaesimcq_u8(state);
    925        state = vaesdq_u8(state, key7);
    926        state = vaesimcq_u8(state);
    927        state = vaesdq_u8(state, key6);
    928        state = vaesimcq_u8(state);
    929        state = vaesdq_u8(state, key5);
    930        state = vaesimcq_u8(state);
    931        state = vaesdq_u8(state, key4);
    932        state = vaesimcq_u8(state);
    933        state = vaesdq_u8(state, key3);
    934        state = vaesimcq_u8(state);
    935        state = vaesdq_u8(state, key2);
    936        /* AddRoundKey */
    937        state = veorq_u8(state, key1);
    938 
    939 #if defined(HAVE_UNALIGNED_ACCESS)
    940        vst1q_u8(output, state);
    941 #else
    942        if ((uintptr_t)output & 0x7) {
    943            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
    944            memcpy(output, buf, 16);
    945        } else {
    946            vst1q_u8(__builtin_assume_aligned(output, 8), state);
    947        }
    948 #endif
    949        output += 16;
    950    }
    951 
    952    return SECSuccess;
    953 }
    954 
    955 SECStatus
    956 arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
    957                        unsigned int *outputLen,
    958                        unsigned int maxOutputLen,
    959                        const unsigned char *input,
    960                        unsigned int inputLen,
    961                        unsigned int blocksize)
    962 {
    963 #if !defined(HAVE_UNALIGNED_ACCESS)
    964    pre_align unsigned char buf[16] post_align;
    965 #endif
    966    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
    967    uint8x16_t key11, key12, key13, key14, key15;
    968    uint8x16_t iv;
    969    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
    970 
    971    if (!inputLen) {
    972        return SECSuccess;
    973    }
    974 
    975    /* iv */
    976    iv = vld1q_u8(cx->iv);
    977 
    978    /* expanedKey */
    979    key1 = vld1q_u8(key);
    980    key2 = vld1q_u8(key + 16);
    981    key3 = vld1q_u8(key + 32);
    982    key4 = vld1q_u8(key + 48);
    983    key5 = vld1q_u8(key + 64);
    984    key6 = vld1q_u8(key + 80);
    985    key7 = vld1q_u8(key + 96);
    986    key8 = vld1q_u8(key + 112);
    987    key9 = vld1q_u8(key + 128);
    988    key10 = vld1q_u8(key + 144);
    989    key11 = vld1q_u8(key + 160);
    990    key12 = vld1q_u8(key + 176);
    991    key13 = vld1q_u8(key + 192);
    992    key14 = vld1q_u8(key + 208);
    993    key15 = vld1q_u8(key + 224);
    994 
    995    while (inputLen > 0) {
    996        uint8x16_t state;
    997 #if defined(HAVE_UNALIGNED_ACCESS)
    998        state = vld1q_u8(input);
    999 #else
   1000        if ((uintptr_t)input & 0x7) {
   1001            memcpy(buf, input, 16);
   1002            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
   1003        } else {
   1004            state = vld1q_u8(__builtin_assume_aligned(input, 8));
   1005        }
   1006 #endif
   1007        input += 16;
   1008        inputLen -= 16;
   1009 
   1010        state = veorq_u8(state, iv);
   1011 
   1012        /* Rounds */
   1013        state = vaeseq_u8(state, key1);
   1014        state = vaesmcq_u8(state);
   1015        state = vaeseq_u8(state, key2);
   1016        state = vaesmcq_u8(state);
   1017        state = vaeseq_u8(state, key3);
   1018        state = vaesmcq_u8(state);
   1019        state = vaeseq_u8(state, key4);
   1020        state = vaesmcq_u8(state);
   1021        state = vaeseq_u8(state, key5);
   1022        state = vaesmcq_u8(state);
   1023        state = vaeseq_u8(state, key6);
   1024        state = vaesmcq_u8(state);
   1025        state = vaeseq_u8(state, key7);
   1026        state = vaesmcq_u8(state);
   1027        state = vaeseq_u8(state, key8);
   1028        state = vaesmcq_u8(state);
   1029        state = vaeseq_u8(state, key9);
   1030        state = vaesmcq_u8(state);
   1031        state = vaeseq_u8(state, key10);
   1032        state = vaesmcq_u8(state);
   1033        state = vaeseq_u8(state, key11);
   1034        state = vaesmcq_u8(state);
   1035        state = vaeseq_u8(state, key12);
   1036        state = vaesmcq_u8(state);
   1037        state = vaeseq_u8(state, key13);
   1038        state = vaesmcq_u8(state);
   1039        state = vaeseq_u8(state, key14);
   1040        /* AddRoundKey */
   1041        state = veorq_u8(state, key15);
   1042 
   1043 #if defined(HAVE_UNALIGNED_ACCESS)
   1044        vst1q_u8(output, state);
   1045 #else
   1046        if ((uintptr_t)output & 0x7) {
   1047            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
   1048            memcpy(output, buf, 16);
   1049        } else {
   1050            vst1q_u8(__builtin_assume_aligned(output, 8), state);
   1051        }
   1052 #endif
   1053        output += 16;
   1054        iv = state;
   1055    }
   1056    vst1q_u8(cx->iv, iv);
   1057 
   1058    return SECSuccess;
   1059 }
   1060 
   1061 SECStatus
   1062 arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
   1063                        unsigned int *outputLen,
   1064                        unsigned int maxOutputLen,
   1065                        const unsigned char *input,
   1066                        unsigned int inputLen,
   1067                        unsigned int blocksize)
   1068 {
   1069 #if !defined(HAVE_UNALIGNED_ACCESS)
   1070    pre_align unsigned char buf[16] post_align;
   1071 #endif
   1072    uint8x16_t iv;
   1073    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
   1074    uint8x16_t key11, key12, key13, key14, key15;
   1075    const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
   1076 
   1077    if (!inputLen) {
   1078        return SECSuccess;
   1079    }
   1080 
   1081    /* iv */
   1082    iv = vld1q_u8(cx->iv);
   1083 
   1084    /* expanedKey */
   1085    key1 = vld1q_u8(key);
   1086    key2 = vld1q_u8(key + 16);
   1087    key3 = vld1q_u8(key + 32);
   1088    key4 = vld1q_u8(key + 48);
   1089    key5 = vld1q_u8(key + 64);
   1090    key6 = vld1q_u8(key + 80);
   1091    key7 = vld1q_u8(key + 96);
   1092    key8 = vld1q_u8(key + 112);
   1093    key9 = vld1q_u8(key + 128);
   1094    key10 = vld1q_u8(key + 144);
   1095    key11 = vld1q_u8(key + 160);
   1096    key12 = vld1q_u8(key + 176);
   1097    key13 = vld1q_u8(key + 192);
   1098    key14 = vld1q_u8(key + 208);
   1099    key15 = vld1q_u8(key + 224);
   1100 
   1101    while (inputLen > 0) {
   1102        uint8x16_t state, old_state;
   1103 #if defined(HAVE_UNALIGNED_ACCESS)
   1104        state = vld1q_u8(input);
   1105 #else
   1106        if ((uintptr_t)input & 0x7) {
   1107            memcpy(buf, input, 16);
   1108            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
   1109        } else {
   1110            state = vld1q_u8(__builtin_assume_aligned(input, 8));
   1111        }
   1112 #endif
   1113        old_state = state;
   1114        input += 16;
   1115        inputLen -= 16;
   1116 
   1117        /* Rounds */
   1118        state = vaesdq_u8(state, key15);
   1119        state = vaesimcq_u8(state);
   1120        state = vaesdq_u8(state, key14);
   1121        state = vaesimcq_u8(state);
   1122        state = vaesdq_u8(state, key13);
   1123        state = vaesimcq_u8(state);
   1124        state = vaesdq_u8(state, key12);
   1125        state = vaesimcq_u8(state);
   1126        state = vaesdq_u8(state, key11);
   1127        state = vaesimcq_u8(state);
   1128        state = vaesdq_u8(state, key10);
   1129        state = vaesimcq_u8(state);
   1130        state = vaesdq_u8(state, key9);
   1131        state = vaesimcq_u8(state);
   1132        state = vaesdq_u8(state, key8);
   1133        state = vaesimcq_u8(state);
   1134        state = vaesdq_u8(state, key7);
   1135        state = vaesimcq_u8(state);
   1136        state = vaesdq_u8(state, key6);
   1137        state = vaesimcq_u8(state);
   1138        state = vaesdq_u8(state, key5);
   1139        state = vaesimcq_u8(state);
   1140        state = vaesdq_u8(state, key4);
   1141        state = vaesimcq_u8(state);
   1142        state = vaesdq_u8(state, key3);
   1143        state = vaesimcq_u8(state);
   1144        state = vaesdq_u8(state, key2);
   1145        /* AddRoundKey */
   1146        state = veorq_u8(state, key1);
   1147 
   1148        state = veorq_u8(state, iv);
   1149 
   1150 #if defined(HAVE_UNALIGNED_ACCESS)
   1151        vst1q_u8(output, state);
   1152 #else
   1153        if ((uintptr_t)output & 0x7) {
   1154            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
   1155            memcpy(output, buf, 16);
   1156        } else {
   1157            vst1q_u8(__builtin_assume_aligned(output, 8), state);
   1158        }
   1159 #endif
   1160        output += 16;
   1161 
   1162        iv = old_state;
   1163    }
   1164    vst1q_u8(cx->iv, iv);
   1165 
   1166    return SECSuccess;
   1167 }
   1168 
   1169 #endif