tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rijndael.c (47416B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #ifdef FREEBL_NO_DEPEND
      6 #include "stubs.h"
      7 #endif
      8 
      9 #include "blapit.h"
     10 #include "prenv.h"
     11 #include "prerr.h"
     12 #include "prinit.h"
     13 #include "secerr.h"
     14 
     15 #include "prtypes.h"
     16 #include "blapi.h"
     17 #include "rijndael.h"
     18 
     19 #include "cts.h"
     20 #include "ctr.h"
     21 #include "gcm.h"
     22 #include "mpi.h"
     23 
     24 #if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64)
     25 // not test yet on big endian platform of arm
     26 #undef USE_HW_AES
     27 #endif
     28 
     29 #ifdef __powerpc64__
     30 #include "ppc-crypto.h"
     31 #endif
     32 
     33 #ifdef USE_HW_AES
     34 #ifdef NSS_X86_OR_X64
     35 #include "intel-aes.h"
     36 #else
     37 #include "aes-armv8.h"
     38 #endif
     39 #endif /* USE_HW_AES */
     40 #ifdef INTEL_GCM
     41 #include "intel-gcm.h"
     42 #endif /* INTEL_GCM */
     43 #if defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
     44 #include "ppc-gcm.h"
     45 #endif
     46 
     47 /* Forward declarations */
     48 void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
     49                                   unsigned int Nk);
     50 void rijndael_native_encryptBlock(AESContext *cx,
     51                                  unsigned char *output,
     52                                  const unsigned char *input);
     53 void rijndael_native_decryptBlock(AESContext *cx,
     54                                  unsigned char *output,
     55                                  const unsigned char *input);
     56 void native_xorBlock(unsigned char *out,
     57                     const unsigned char *a,
     58                     const unsigned char *b);
     59 
     60 /* Stub definitions for the above rijndael_native_* functions, which
     61 * shouldn't be used unless NSS_X86_OR_X64 is defined */
     62 #ifndef NSS_X86_OR_X64
     63 void
     64 rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
     65                              unsigned int Nk)
     66 {
     67    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
     68    PORT_Assert(0);
     69 }
     70 
     71 void
     72 rijndael_native_encryptBlock(AESContext *cx,
     73                             unsigned char *output,
     74                             const unsigned char *input)
     75 {
     76    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
     77    PORT_Assert(0);
     78 }
     79 
     80 void
     81 rijndael_native_decryptBlock(AESContext *cx,
     82                             unsigned char *output,
     83                             const unsigned char *input)
     84 {
     85    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
     86    PORT_Assert(0);
     87 }
     88 
     89 void
     90 native_xorBlock(unsigned char *out, const unsigned char *a,
     91                const unsigned char *b)
     92 {
     93    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
     94    PORT_Assert(0);
     95 }
     96 #endif /* NSS_X86_OR_X64 */
     97 
     98 /*
     99 * There are currently three ways to build this code, varying in performance
    100 * and code size.
    101 *
    102 * RIJNDAEL_INCLUDE_TABLES         Include all tables from rijndael32.tab
    103 * RIJNDAEL_GENERATE_VALUES        Do not store tables, generate the table
    104 *                                 values "on-the-fly", using gfm
    105 * RIJNDAEL_GENERATE_VALUES_MACRO  Same as above, but use macros
    106 *
    107 * The default is RIJNDAEL_INCLUDE_TABLES.
    108 */
    109 
    110 /*
    111 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
    112 *                                                 T**-1[0..4], IMXC[0..4]
    113 * When building anything else, includes S, S**-1, Rcon
    114 */
    115 #include "rijndael32.tab"
    116 
    117 #if defined(RIJNDAEL_INCLUDE_TABLES)
    118 /*
    119 * RIJNDAEL_INCLUDE_TABLES
    120 */
    121 #define T0(i) _T0[i]
    122 #define T1(i) _T1[i]
    123 #define T2(i) _T2[i]
    124 #define T3(i) _T3[i]
    125 #define TInv0(i) _TInv0[i]
    126 #define TInv1(i) _TInv1[i]
    127 #define TInv2(i) _TInv2[i]
    128 #define TInv3(i) _TInv3[i]
    129 #define IMXC0(b) _IMXC0[b]
    130 #define IMXC1(b) _IMXC1[b]
    131 #define IMXC2(b) _IMXC2[b]
    132 #define IMXC3(b) _IMXC3[b]
    133 /* The S-box can be recovered from the T-tables */
    134 #ifdef IS_LITTLE_ENDIAN
    135 #define SBOX(b) ((PRUint8)_T3[b])
    136 #else
    137 #define SBOX(b) ((PRUint8)_T1[b])
    138 #endif
    139 #define SINV(b) (_SInv[b])
    140 
    141 #else /* not RIJNDAEL_INCLUDE_TABLES */
    142 
    143 /*
    144 * Code for generating T-table values.
    145 */
    146 
    147 #ifdef IS_LITTLE_ENDIAN
    148 #define WORD4(b0, b1, b2, b3) \
    149    ((((PRUint32)b3) << 24) | \
    150     (((PRUint32)b2) << 16) | \
    151     (((PRUint32)b1) << 8) |  \
    152     ((PRUint32)b0))
    153 #else
    154 #define WORD4(b0, b1, b2, b3) \
    155    ((((PRUint32)b0) << 24) | \
    156     (((PRUint32)b1) << 16) | \
    157     (((PRUint32)b2) << 8) |  \
    158     ((PRUint32)b3))
    159 #endif
    160 
    161 /*
    162 * Define the S and S**-1 tables (both have been stored)
    163 */
    164 #define SBOX(b) (_S[b])
    165 #define SINV(b) (_SInv[b])
    166 
    167 /*
    168 * The function xtime, used for Galois field multiplication
    169 */
    170 #define XTIME(a) \
    171    ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
    172 
    173 /* Choose GFM method (macros or function) */
    174 #if defined(RIJNDAEL_GENERATE_VALUES_MACRO)
    175 
    176 /*
    177 * Galois field GF(2**8) multipliers, in macro form
    178 */
    179 #define GFM01(a) \
    180    (a) /* a * 01 = a, the identity */
    181 #define GFM02(a) \
    182    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
    183 #define GFM04(a) \
    184    (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
    185 #define GFM08(a) \
    186    (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
    187 #define GFM03(a) \
    188    (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
    189 #define GFM09(a) \
    190    (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
    191 #define GFM0B(a) \
    192    (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
    193 #define GFM0D(a) \
    194    (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
    195 #define GFM0E(a) \
    196    (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
    197 
    198 #else /* RIJNDAEL_GENERATE_VALUES */
    199 
    200 /* GF_MULTIPLY
    201 *
    202 * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
    203 */
    204 PRUint8
    205 gfm(PRUint8 a, PRUint8 b)
    206 {
    207    PRUint8 res = 0;
    208    while (b > 0) {
    209        res = (b & 0x01) ? res ^ a : res;
    210        a = XTIME(a);
    211        b >>= 1;
    212    }
    213    return res;
    214 }
    215 
    216 #define GFM01(a) \
    217    (a) /* a * 01 = a, the identity */
    218 #define GFM02(a) \
    219    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
    220 #define GFM03(a) \
    221    (gfm(a, 0x03)) /* a * 03 */
    222 #define GFM09(a) \
    223    (gfm(a, 0x09)) /* a * 09 */
    224 #define GFM0B(a) \
    225    (gfm(a, 0x0B)) /* a * 0B */
    226 #define GFM0D(a) \
    227    (gfm(a, 0x0D)) /* a * 0D */
    228 #define GFM0E(a) \
    229    (gfm(a, 0x0E)) /* a * 0E */
    230 
    231 #endif /* choosing GFM function */
    232 
    233 /*
    234 * The T-tables
    235 */
    236 #define G_T0(i) \
    237    (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
    238 #define G_T1(i) \
    239    (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
    240 #define G_T2(i) \
    241    (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
    242 #define G_T3(i) \
    243    (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
    244 
    245 /*
    246 * The inverse T-tables
    247 */
    248 #define G_TInv0(i) \
    249    (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
    250 #define G_TInv1(i) \
    251    (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
    252 #define G_TInv2(i) \
    253    (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
    254 #define G_TInv3(i) \
    255    (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
    256 
    257 /*
    258 * The inverse mix column tables
    259 */
    260 #define G_IMXC0(i) \
    261    (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
    262 #define G_IMXC1(i) \
    263    (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
    264 #define G_IMXC2(i) \
    265    (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
    266 #define G_IMXC3(i) \
    267    (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
    268 
    269 /* Now choose the T-table indexing method */
    270 #if defined(RIJNDAEL_GENERATE_VALUES)
    271 /* generate values for the tables with a function*/
    272 static PRUint32
    273 gen_TInvXi(PRUint8 tx, PRUint8 i)
    274 {
    275    PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
    276    si01 = SINV(i);
    277    si02 = XTIME(si01);
    278    si04 = XTIME(si02);
    279    si08 = XTIME(si04);
    280    si03 = si02 ^ si01;
    281    si09 = si08 ^ si01;
    282    si0B = si08 ^ si03;
    283    si0D = si09 ^ si04;
    284    si0E = si08 ^ si04 ^ si02;
    285    switch (tx) {
    286        case 0:
    287            return WORD4(si0E, si09, si0D, si0B);
    288        case 1:
    289            return WORD4(si0B, si0E, si09, si0D);
    290        case 2:
    291            return WORD4(si0D, si0B, si0E, si09);
    292        case 3:
    293            return WORD4(si09, si0D, si0B, si0E);
    294    }
    295    return -1;
    296 }
    297 #define T0(i) G_T0(i)
    298 #define T1(i) G_T1(i)
    299 #define T2(i) G_T2(i)
    300 #define T3(i) G_T3(i)
    301 #define TInv0(i) gen_TInvXi(0, i)
    302 #define TInv1(i) gen_TInvXi(1, i)
    303 #define TInv2(i) gen_TInvXi(2, i)
    304 #define TInv3(i) gen_TInvXi(3, i)
    305 #define IMXC0(b) G_IMXC0(b)
    306 #define IMXC1(b) G_IMXC1(b)
    307 #define IMXC2(b) G_IMXC2(b)
    308 #define IMXC3(b) G_IMXC3(b)
    309 #else /* RIJNDAEL_GENERATE_VALUES_MACRO */
    310 /* generate values for the tables with macros */
    311 #define T0(i) G_T0(i)
    312 #define T1(i) G_T1(i)
    313 #define T2(i) G_T2(i)
    314 #define T3(i) G_T3(i)
    315 #define TInv0(i) G_TInv0(i)
    316 #define TInv1(i) G_TInv1(i)
    317 #define TInv2(i) G_TInv2(i)
    318 #define TInv3(i) G_TInv3(i)
    319 #define IMXC0(b) G_IMXC0(b)
    320 #define IMXC1(b) G_IMXC1(b)
    321 #define IMXC2(b) G_IMXC2(b)
    322 #define IMXC3(b) G_IMXC3(b)
    323 #endif /* choose T-table indexing method */
    324 
    325 #endif /* not RIJNDAEL_INCLUDE_TABLES */
    326 
    327 /**************************************************************************
    328 *
    329 * Stuff related to the Rijndael key schedule
    330 *
    331 *************************************************************************/
    332 
    333 #define SUBBYTE(w)                                \
    334    ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
    335     (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
    336     (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) |   \
    337     (((PRUint32)SBOX((w)&0xff))))
    338 
    339 #ifdef IS_LITTLE_ENDIAN
    340 #define ROTBYTE(b) \
    341    ((b >> 8) | (b << 24))
    342 #else
    343 #define ROTBYTE(b) \
    344    ((b << 8) | (b >> 24))
    345 #endif
    346 
    347 /* rijndael_key_expansion7
    348 *
    349 * Generate the expanded key from the key input by the user.
    350 * XXX
    351 * Nk == 7 (224 key bits) is a weird case.  Since Nk > 6, an added SubByte
    352 * transformation is done periodically.  The period is every 4 bytes, and
    353 * since 7%4 != 0 this happens at different times for each key word (unlike
    354 * Nk == 8 where it happens twice in every key word, in the same positions).
    355 * For now, I'm implementing this case "dumbly", w/o any unrolling.
    356 */
    357 static void
    358 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
    359 {
    360    unsigned int i;
    361    PRUint32 *W;
    362    PRUint32 *pW;
    363    PRUint32 tmp;
    364    W = cx->k.expandedKey;
    365    /* 1.  the first Nk words contain the cipher key */
    366    memcpy(W, key, Nk * 4);
    367    i = Nk;
    368    /* 2.  loop until full expanded key is obtained */
    369    pW = W + i - 1;
    370    for (; i < cx->Nb * (cx->Nr + 1); ++i) {
    371        tmp = *pW++;
    372        if (i % Nk == 0)
    373            tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
    374        else if (i % Nk == 4)
    375            tmp = SUBBYTE(tmp);
    376        *pW = W[i - Nk] ^ tmp;
    377    }
    378 }
    379 
    380 /* rijndael_key_expansion
    381 *
    382 * Generate the expanded key from the key input by the user.
    383 */
    384 static void
    385 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
    386 {
    387    unsigned int i;
    388    PRUint32 *W;
    389    PRUint32 *pW;
    390    PRUint32 tmp;
    391    unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
    392    if (Nk == 7) {
    393        rijndael_key_expansion7(cx, key, Nk);
    394        return;
    395    }
    396    W = cx->k.expandedKey;
    397    /* The first Nk words contain the input cipher key */
    398    memcpy(W, key, Nk * 4);
    399    i = Nk;
    400    pW = W + i - 1;
    401    /* Loop over all sets of Nk words, except the last */
    402    while (i < round_key_words - Nk) {
    403        tmp = *pW++;
    404        tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
    405        *pW = W[i++ - Nk] ^ tmp;
    406        tmp = *pW++;
    407        *pW = W[i++ - Nk] ^ tmp;
    408        tmp = *pW++;
    409        *pW = W[i++ - Nk] ^ tmp;
    410        tmp = *pW++;
    411        *pW = W[i++ - Nk] ^ tmp;
    412        if (Nk == 4)
    413            continue;
    414        switch (Nk) {
    415            case 8:
    416                tmp = *pW++;
    417                tmp = SUBBYTE(tmp);
    418                *pW = W[i++ - Nk] ^ tmp;
    419            case 7:
    420                tmp = *pW++;
    421                *pW = W[i++ - Nk] ^ tmp;
    422            case 6:
    423                tmp = *pW++;
    424                *pW = W[i++ - Nk] ^ tmp;
    425            case 5:
    426                tmp = *pW++;
    427                *pW = W[i++ - Nk] ^ tmp;
    428        }
    429    }
    430    /* Generate the last word */
    431    tmp = *pW++;
    432    tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
    433    *pW = W[i++ - Nk] ^ tmp;
    434    /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0.  However,
    435     * since the above loop generated all but the last Nk key words, there
    436     * is no more need for the SubByte transformation.
    437     */
    438    if (Nk < 8) {
    439        for (; i < round_key_words; ++i) {
    440            tmp = *pW++;
    441            *pW = W[i - Nk] ^ tmp;
    442        }
    443    } else {
    444        /* except in the case when Nk == 8.  Then one more SubByte may have
    445         * to be performed, at i % Nk == 4.
    446         */
    447        for (; i < round_key_words; ++i) {
    448            tmp = *pW++;
    449            if (i % Nk == 4)
    450                tmp = SUBBYTE(tmp);
    451            *pW = W[i - Nk] ^ tmp;
    452        }
    453    }
    454 }
    455 
    456 /* rijndael_invkey_expansion
    457 *
    458 * Generate the expanded key for the inverse cipher from the key input by
    459 * the user.
    460 */
    461 static void
    462 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
    463 {
    464    unsigned int r;
    465    PRUint32 *roundkeyw;
    466    PRUint8 *b;
    467    int Nb = cx->Nb;
    468    /* begins like usual key expansion ... */
    469    rijndael_key_expansion(cx, key, Nk);
    470    /* ... but has the additional step of InvMixColumn,
    471     * excepting the first and last round keys.
    472     */
    473    roundkeyw = cx->k.expandedKey + cx->Nb;
    474    for (r = 1; r < cx->Nr; ++r) {
    475        /* each key word, roundkeyw, represents a column in the key
    476         * matrix.  Each column is multiplied by the InvMixColumn matrix.
    477         *   [ 0E 0B 0D 09 ]   [ b0 ]
    478         *   [ 09 0E 0B 0D ] * [ b1 ]
    479         *   [ 0D 09 0E 0B ]   [ b2 ]
    480         *   [ 0B 0D 09 0E ]   [ b3 ]
    481         */
    482        b = (PRUint8 *)roundkeyw;
    483        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
    484        b = (PRUint8 *)roundkeyw;
    485        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
    486        b = (PRUint8 *)roundkeyw;
    487        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
    488        b = (PRUint8 *)roundkeyw;
    489        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
    490        if (Nb <= 4)
    491            continue;
    492        switch (Nb) {
    493            case 8:
    494                b = (PRUint8 *)roundkeyw;
    495                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
    496                               IMXC2(b[2]) ^ IMXC3(b[3]);
    497            case 7:
    498                b = (PRUint8 *)roundkeyw;
    499                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
    500                               IMXC2(b[2]) ^ IMXC3(b[3]);
    501            case 6:
    502                b = (PRUint8 *)roundkeyw;
    503                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
    504                               IMXC2(b[2]) ^ IMXC3(b[3]);
    505            case 5:
    506                b = (PRUint8 *)roundkeyw;
    507                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
    508                               IMXC2(b[2]) ^ IMXC3(b[3]);
    509        }
    510    }
    511 }
    512 
    513 /**************************************************************************
    514 *
    515 * Stuff related to Rijndael encryption/decryption.
    516 *
    517 *************************************************************************/
    518 
    519 #ifdef IS_LITTLE_ENDIAN
    520 #define BYTE0WORD(w) ((w)&0x000000ff)
    521 #define BYTE1WORD(w) ((w)&0x0000ff00)
    522 #define BYTE2WORD(w) ((w)&0x00ff0000)
    523 #define BYTE3WORD(w) ((w)&0xff000000)
    524 #else
    525 #define BYTE0WORD(w) ((w)&0xff000000)
    526 #define BYTE1WORD(w) ((w)&0x00ff0000)
    527 #define BYTE2WORD(w) ((w)&0x0000ff00)
    528 #define BYTE3WORD(w) ((w)&0x000000ff)
    529 #endif
    530 
    531 typedef union {
    532    PRUint32 w[4];
    533    PRUint8 b[16];
    534 } rijndael_state;
    535 
    536 #define COLUMN_0(state) state.w[0]
    537 #define COLUMN_1(state) state.w[1]
    538 #define COLUMN_2(state) state.w[2]
    539 #define COLUMN_3(state) state.w[3]
    540 
    541 #define STATE_BYTE(i) state.b[i]
    542 
    543 // out = a ^ b
    544 inline static void
    545 xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)
    546 {
    547    for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {
    548        (out)[j] = (a)[j] ^ (b)[j];
    549    }
    550 }
    551 
    552 static void NO_SANITIZE_ALIGNMENT
    553 rijndael_encryptBlock128(AESContext *cx,
    554                         unsigned char *output,
    555                         const unsigned char *input)
    556 {
    557    unsigned int r;
    558    PRUint32 *roundkeyw;
    559    rijndael_state state;
    560    PRUint32 C0, C1, C2, C3;
    561 #if defined(NSS_X86_OR_X64)
    562 #define pIn input
    563 #define pOut output
    564 #else
    565    unsigned char *pIn, *pOut;
    566    PRUint32 inBuf[4], outBuf[4];
    567 
    568    if ((ptrdiff_t)input & 0x3) {
    569        memcpy(inBuf, input, sizeof inBuf);
    570        pIn = (unsigned char *)inBuf;
    571    } else {
    572        pIn = (unsigned char *)input;
    573    }
    574    if ((ptrdiff_t)output & 0x3) {
    575        pOut = (unsigned char *)outBuf;
    576    } else {
    577        pOut = (unsigned char *)output;
    578    }
    579 #endif
    580    roundkeyw = cx->k.expandedKey;
    581    /* Step 1: Add Round Key 0 to initial state */
    582    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
    583    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
    584    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
    585    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
    586    /* Step 2: Loop over rounds [1..NR-1] */
    587    for (r = 1; r < cx->Nr; ++r) {
    588        /* Do ShiftRow, ByteSub, and MixColumn all at once */
    589        C0 = T0(STATE_BYTE(0)) ^
    590             T1(STATE_BYTE(5)) ^
    591             T2(STATE_BYTE(10)) ^
    592             T3(STATE_BYTE(15));
    593        C1 = T0(STATE_BYTE(4)) ^
    594             T1(STATE_BYTE(9)) ^
    595             T2(STATE_BYTE(14)) ^
    596             T3(STATE_BYTE(3));
    597        C2 = T0(STATE_BYTE(8)) ^
    598             T1(STATE_BYTE(13)) ^
    599             T2(STATE_BYTE(2)) ^
    600             T3(STATE_BYTE(7));
    601        C3 = T0(STATE_BYTE(12)) ^
    602             T1(STATE_BYTE(1)) ^
    603             T2(STATE_BYTE(6)) ^
    604             T3(STATE_BYTE(11));
    605        /* Round key addition */
    606        COLUMN_0(state) = C0 ^ *roundkeyw++;
    607        COLUMN_1(state) = C1 ^ *roundkeyw++;
    608        COLUMN_2(state) = C2 ^ *roundkeyw++;
    609        COLUMN_3(state) = C3 ^ *roundkeyw++;
    610    }
    611    /* Step 3: Do the last round */
    612    /* Final round does not employ MixColumn */
    613    C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
    614          (BYTE1WORD(T3(STATE_BYTE(5)))) |
    615          (BYTE2WORD(T0(STATE_BYTE(10)))) |
    616          (BYTE3WORD(T1(STATE_BYTE(15))))) ^
    617         *roundkeyw++;
    618    C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
    619          (BYTE1WORD(T3(STATE_BYTE(9)))) |
    620          (BYTE2WORD(T0(STATE_BYTE(14)))) |
    621          (BYTE3WORD(T1(STATE_BYTE(3))))) ^
    622         *roundkeyw++;
    623    C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
    624          (BYTE1WORD(T3(STATE_BYTE(13)))) |
    625          (BYTE2WORD(T0(STATE_BYTE(2)))) |
    626          (BYTE3WORD(T1(STATE_BYTE(7))))) ^
    627         *roundkeyw++;
    628    C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
    629          (BYTE1WORD(T3(STATE_BYTE(1)))) |
    630          (BYTE2WORD(T0(STATE_BYTE(6)))) |
    631          (BYTE3WORD(T1(STATE_BYTE(11))))) ^
    632         *roundkeyw++;
    633    *((PRUint32 *)pOut) = C0;
    634    *((PRUint32 *)(pOut + 4)) = C1;
    635    *((PRUint32 *)(pOut + 8)) = C2;
    636    *((PRUint32 *)(pOut + 12)) = C3;
    637 #if defined(NSS_X86_OR_X64)
    638 #undef pIn
    639 #undef pOut
    640 #else
    641    if ((ptrdiff_t)output & 0x3) {
    642        memcpy(output, outBuf, sizeof outBuf);
    643    }
    644 #endif
    645 }
    646 
    647 static void NO_SANITIZE_ALIGNMENT
    648 rijndael_decryptBlock128(AESContext *cx,
    649                         unsigned char *output,
    650                         const unsigned char *input)
    651 {
    652    int r;
    653    PRUint32 *roundkeyw;
    654    rijndael_state state;
    655    PRUint32 C0, C1, C2, C3;
    656 #if defined(NSS_X86_OR_X64)
    657 #define pIn input
    658 #define pOut output
    659 #else
    660    unsigned char *pIn, *pOut;
    661    PRUint32 inBuf[4], outBuf[4];
    662 
    663    if ((ptrdiff_t)input & 0x3) {
    664        memcpy(inBuf, input, sizeof inBuf);
    665        pIn = (unsigned char *)inBuf;
    666    } else {
    667        pIn = (unsigned char *)input;
    668    }
    669    if ((ptrdiff_t)output & 0x3) {
    670        pOut = (unsigned char *)outBuf;
    671    } else {
    672        pOut = (unsigned char *)output;
    673    }
    674 #endif
    675    roundkeyw = cx->k.expandedKey + cx->Nb * cx->Nr + 3;
    676    /* reverse the final key addition */
    677    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
    678    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
    679    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
    680    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
    681    /* Loop over rounds in reverse [NR..1] */
    682    for (r = cx->Nr; r > 1; --r) {
    683        /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
    684        C0 = TInv0(STATE_BYTE(0)) ^
    685             TInv1(STATE_BYTE(13)) ^
    686             TInv2(STATE_BYTE(10)) ^
    687             TInv3(STATE_BYTE(7));
    688        C1 = TInv0(STATE_BYTE(4)) ^
    689             TInv1(STATE_BYTE(1)) ^
    690             TInv2(STATE_BYTE(14)) ^
    691             TInv3(STATE_BYTE(11));
    692        C2 = TInv0(STATE_BYTE(8)) ^
    693             TInv1(STATE_BYTE(5)) ^
    694             TInv2(STATE_BYTE(2)) ^
    695             TInv3(STATE_BYTE(15));
    696        C3 = TInv0(STATE_BYTE(12)) ^
    697             TInv1(STATE_BYTE(9)) ^
    698             TInv2(STATE_BYTE(6)) ^
    699             TInv3(STATE_BYTE(3));
    700        /* Invert the key addition step */
    701        COLUMN_3(state) = C3 ^ *roundkeyw--;
    702        COLUMN_2(state) = C2 ^ *roundkeyw--;
    703        COLUMN_1(state) = C1 ^ *roundkeyw--;
    704        COLUMN_0(state) = C0 ^ *roundkeyw--;
    705    }
    706    /* inverse sub */
    707    pOut[0] = SINV(STATE_BYTE(0));
    708    pOut[1] = SINV(STATE_BYTE(13));
    709    pOut[2] = SINV(STATE_BYTE(10));
    710    pOut[3] = SINV(STATE_BYTE(7));
    711    pOut[4] = SINV(STATE_BYTE(4));
    712    pOut[5] = SINV(STATE_BYTE(1));
    713    pOut[6] = SINV(STATE_BYTE(14));
    714    pOut[7] = SINV(STATE_BYTE(11));
    715    pOut[8] = SINV(STATE_BYTE(8));
    716    pOut[9] = SINV(STATE_BYTE(5));
    717    pOut[10] = SINV(STATE_BYTE(2));
    718    pOut[11] = SINV(STATE_BYTE(15));
    719    pOut[12] = SINV(STATE_BYTE(12));
    720    pOut[13] = SINV(STATE_BYTE(9));
    721    pOut[14] = SINV(STATE_BYTE(6));
    722    pOut[15] = SINV(STATE_BYTE(3));
    723    /* final key addition */
    724    *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
    725    *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
    726    *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
    727    *((PRUint32 *)pOut) ^= *roundkeyw--;
    728 #if defined(NSS_X86_OR_X64)
    729 #undef pIn
    730 #undef pOut
    731 #else
    732    if ((ptrdiff_t)output & 0x3) {
    733        memcpy(output, outBuf, sizeof outBuf);
    734    }
    735 #endif
    736 }
    737 
    738 /**************************************************************************
    739 *
    740 *  Rijndael modes of operation (ECB and CBC)
    741 *
    742 *************************************************************************/
    743 
    744 static SECStatus
    745 rijndael_encryptECB(AESContext *cx, unsigned char *output,
    746                    unsigned int *outputLen, unsigned int maxOutputLen,
    747                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)
    748 {
    749    PORT_Assert(blocksize == AES_BLOCK_SIZE);
    750    PRBool aesni = aesni_support();
    751    while (inputLen > 0) {
    752        if (aesni) {
    753            rijndael_native_encryptBlock(cx, output, input);
    754        } else {
    755            rijndael_encryptBlock128(cx, output, input);
    756        }
    757        output += AES_BLOCK_SIZE;
    758        input += AES_BLOCK_SIZE;
    759        inputLen -= AES_BLOCK_SIZE;
    760    }
    761    return SECSuccess;
    762 }
    763 
    764 static SECStatus
    765 rijndael_encryptCBC(AESContext *cx, unsigned char *output,
    766                    unsigned int *outputLen, unsigned int maxOutputLen,
    767                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)
    768 {
    769    PORT_Assert(blocksize == AES_BLOCK_SIZE);
    770    unsigned char *lastblock = cx->iv;
    771    unsigned char inblock[AES_BLOCK_SIZE * 8];
    772    PRBool aesni = aesni_support();
    773 
    774    if (!inputLen)
    775        return SECSuccess;
    776    while (inputLen > 0) {
    777        if (aesni) {
    778            /* XOR with the last block (IV if first block) */
    779            native_xorBlock(inblock, input, lastblock);
    780            /* encrypt */
    781            rijndael_native_encryptBlock(cx, output, inblock);
    782        } else {
    783            xorBlock(inblock, input, lastblock);
    784            rijndael_encryptBlock128(cx, output, inblock);
    785        }
    786 
    787        /* move to the next block */
    788        lastblock = output;
    789        output += AES_BLOCK_SIZE;
    790        input += AES_BLOCK_SIZE;
    791        inputLen -= AES_BLOCK_SIZE;
    792    }
    793    memcpy(cx->iv, lastblock, AES_BLOCK_SIZE);
    794    return SECSuccess;
    795 }
    796 
    797 static SECStatus
    798 rijndael_decryptECB(AESContext *cx, unsigned char *output,
    799                    unsigned int *outputLen, unsigned int maxOutputLen,
    800                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)
    801 {
    802    PORT_Assert(blocksize == AES_BLOCK_SIZE);
    803    PRBool aesni = aesni_support();
    804    while (inputLen > 0) {
    805        if (aesni) {
    806            rijndael_native_decryptBlock(cx, output, input);
    807        } else {
    808            rijndael_decryptBlock128(cx, output, input);
    809        }
    810        output += AES_BLOCK_SIZE;
    811        input += AES_BLOCK_SIZE;
    812        inputLen -= AES_BLOCK_SIZE;
    813    }
    814    return SECSuccess;
    815 }
    816 
    817 static SECStatus
    818 rijndael_decryptCBC(AESContext *cx, unsigned char *output,
    819                    unsigned int *outputLen, unsigned int maxOutputLen,
    820                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)
    821 {
    822    PORT_Assert(blocksize == AES_BLOCK_SIZE);
    823    const unsigned char *in;
    824    unsigned char *out;
    825    unsigned char newIV[AES_BLOCK_SIZE];
    826    PRBool aesni = aesni_support();
    827 
    828    if (!inputLen)
    829        return SECSuccess;
    830    PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
    831    in = input + (inputLen - AES_BLOCK_SIZE);
    832    memcpy(newIV, in, AES_BLOCK_SIZE);
    833    out = output + (inputLen - AES_BLOCK_SIZE);
    834    while (inputLen > AES_BLOCK_SIZE) {
    835        if (aesni) {
    836            // Use hardware acceleration for normal AES parameters.
    837            rijndael_native_decryptBlock(cx, out, in);
    838            native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
    839        } else {
    840            rijndael_decryptBlock128(cx, out, in);
    841            xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
    842        }
    843        out -= AES_BLOCK_SIZE;
    844        in -= AES_BLOCK_SIZE;
    845        inputLen -= AES_BLOCK_SIZE;
    846    }
    847    if (in == input) {
    848        if (aesni) {
    849            rijndael_native_decryptBlock(cx, out, in);
    850            native_xorBlock(out, out, cx->iv);
    851        } else {
    852            rijndael_decryptBlock128(cx, out, in);
    853            xorBlock(out, out, cx->iv);
    854        }
    855    }
    856    memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
    857    return SECSuccess;
    858 }
    859 
    860 #define FREEBL_CIPHER_WRAP(ctxtype, mmm)                                                    \
    861    static SECStatus freeblCipher_##mmm(void *vctx, unsigned char *output,                  \
    862                                        unsigned int *outputLen, unsigned int maxOutputLen, \
    863                                        const unsigned char *input, unsigned int inputLen,  \
    864                                        unsigned int blocksize)                             \
    865    {                                                                                       \
    866        ctxtype *ctx = vctx;                                                                \
    867        return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, blocksize);       \
    868    }
    869 
    870 FREEBL_CIPHER_WRAP(CTRContext, CTR_Update);
    871 FREEBL_CIPHER_WRAP(CTSContext, CTS_DecryptUpdate);
    872 FREEBL_CIPHER_WRAP(CTSContext, CTS_EncryptUpdate);
    873 FREEBL_CIPHER_WRAP(GCMContext, GCM_DecryptUpdate);
    874 FREEBL_CIPHER_WRAP(GCMContext, GCM_EncryptUpdate);
    875 FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptCBC);
    876 FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptECB);
    877 FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptCBC);
    878 FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptECB);
    879 
    880 #if defined(INTEL_GCM) && defined(USE_HW_AES)
    881 FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptUpdate);
    882 FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptUpdate);
    883 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
    884 FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptUpdate);
    885 FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptUpdate);
    886 #endif
    887 
    888 #if defined(USE_HW_AES)
    889 #if defined(NSS_X86_OR_X64)
    890 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_128);
    891 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_128);
    892 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_128);
    893 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_128);
    894 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_192);
    895 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_192);
    896 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_192);
    897 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_192);
    898 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_256);
    899 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_256);
    900 FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_256);
    901 FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_256);
    902 
    903 #define freeblCipher_native_aes_ecb_worker(encrypt, keysize)            \
    904    ((encrypt)                                                          \
    905         ? ((keysize) == 16   ? freeblCipher_intel_aes_encrypt_ecb_128  \
    906            : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_ecb_192  \
    907                              : freeblCipher_intel_aes_encrypt_ecb_256) \
    908         : ((keysize) == 16   ? freeblCipher_intel_aes_decrypt_ecb_128  \
    909            : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_ecb_192  \
    910                              : freeblCipher_intel_aes_decrypt_ecb_256))
    911 
    912 #define freeblCipher_native_aes_cbc_worker(encrypt, keysize)            \
    913    ((encrypt)                                                          \
    914         ? ((keysize) == 16   ? freeblCipher_intel_aes_encrypt_cbc_128  \
    915            : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_cbc_192  \
    916                              : freeblCipher_intel_aes_encrypt_cbc_256) \
    917         : ((keysize) == 16   ? freeblCipher_intel_aes_decrypt_cbc_128  \
    918            : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_cbc_192  \
    919                              : freeblCipher_intel_aes_decrypt_cbc_256))
    920 #else
    921 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_128);
    922 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_128);
    923 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_128);
    924 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_128);
    925 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_192);
    926 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_192);
    927 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_192);
    928 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_192);
    929 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_256);
    930 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_256);
    931 FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_256);
    932 FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_256);
    933 
    934 #define freeblCipher_native_aes_ecb_worker(encrypt, keysize)          \
    935    ((encrypt)                                                        \
    936         ? ((keysize) == 16   ? freeblCipher_arm_aes_encrypt_ecb_128  \
    937            : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_ecb_192  \
    938                              : freeblCipher_arm_aes_encrypt_ecb_256) \
    939         : ((keysize) == 16   ? freeblCipher_arm_aes_decrypt_ecb_128  \
    940            : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_ecb_192  \
    941                              : freeblCipher_arm_aes_decrypt_ecb_256))
    942 
    943 #define freeblCipher_native_aes_cbc_worker(encrypt, keysize)          \
    944    ((encrypt)                                                        \
    945         ? ((keysize) == 16   ? freeblCipher_arm_aes_encrypt_cbc_128  \
    946            : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_cbc_192  \
    947                              : freeblCipher_arm_aes_encrypt_cbc_256) \
    948         : ((keysize) == 16   ? freeblCipher_arm_aes_decrypt_cbc_128  \
    949            : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_cbc_192  \
    950                              : freeblCipher_arm_aes_decrypt_cbc_256))
    951 #endif
    952 #endif
    953 
    954 #if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)
    955 FREEBL_CIPHER_WRAP(CTRContext, CTR_Update_HW_AES);
    956 #endif
    957 
    958 #define FREEBL_AEAD_WRAP(ctxtype, mmm)                                                                                \
    959    static SECStatus freeblAead_##mmm(void *vctx, unsigned char *output,                                              \
    960                                      unsigned int *outputLen, unsigned int maxOutputLen,                             \
    961                                      const unsigned char *input, unsigned int inputLen,                              \
    962                                      void *params, unsigned int paramsLen,                                           \
    963                                      const unsigned char *aad, unsigned int aadLen,                                  \
    964                                      unsigned int blocksize)                                                         \
    965    {                                                                                                                 \
    966        ctxtype *ctx = vctx;                                                                                          \
    967        return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, params, paramsLen, aad, aadLen, blocksize); \
    968    }
    969 
    970 FREEBL_AEAD_WRAP(GCMContext, GCM_EncryptAEAD);
    971 FREEBL_AEAD_WRAP(GCMContext, GCM_DecryptAEAD);
    972 
    973 #if defined(INTEL_GCM) && defined(USE_HW_AES)
    974 FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptAEAD);
    975 FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptAEAD);
    976 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
    977 FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptAEAD);
    978 FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptAEAD);
    979 #endif
    980 
    981 #define FREEBL_DESTROY_WRAP(ctxtype, mmm)                      \
    982    static void freeblDestroy_##mmm(void *vctx, PRBool freeit) \
    983    {                                                          \
    984        ctxtype *ctx = vctx;                                   \
    985        mmm(ctx, freeit);                                      \
    986    }
    987 
    988 FREEBL_DESTROY_WRAP(CTRContext, CTR_DestroyContext);
    989 FREEBL_DESTROY_WRAP(CTSContext, CTS_DestroyContext);
    990 FREEBL_DESTROY_WRAP(GCMContext, GCM_DestroyContext);
    991 
    992 #if defined(INTEL_GCM) && defined(USE_HW_AES)
    993 FREEBL_DESTROY_WRAP(intel_AES_GCMContext, intel_AES_GCM_DestroyContext);
    994 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
    995 FREEBL_DESTROY_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DestroyContext);
    996 #endif
    997 
    998 /************************************************************************
    999 *
   1000 * BLAPI Interface functions
   1001 *
   1002 * The following functions implement the encryption routines defined in
   1003 * BLAPI for the AES cipher, Rijndael.
   1004 *
   1005 ***********************************************************************/
   1006 
   1007 AESContext *
   1008 AES_AllocateContext(void)
   1009 {
   1010    return PORT_ZNewAligned(AESContext, 16, mem);
   1011 }
   1012 
   1013 /*
   1014 ** Initialize a new AES context suitable for AES encryption/decryption in
   1015 ** the ECB or CBC mode.
   1016 **  "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
   1017 */
   1018 static SECStatus
   1019 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
   1020                const unsigned char *iv, int mode, unsigned int encrypt)
   1021 {
   1022    unsigned int Nk;
   1023    PRBool use_hw_aes;
   1024    /* According to AES, block lengths are 128 and key lengths are 128, 192, or
   1025     * 256 bits. We support other key sizes as well [128, 256] as long as the
   1026     * length in bytes is divisible by 4.
   1027     */
   1028 
   1029    if (key == NULL ||
   1030        keysize < AES_BLOCK_SIZE ||
   1031        keysize > 32 ||
   1032        keysize % 4 != 0) {
   1033        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1034        return SECFailure;
   1035    }
   1036    if (mode != NSS_AES && mode != NSS_AES_CBC) {
   1037        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1038        return SECFailure;
   1039    }
   1040    if (mode == NSS_AES_CBC && iv == NULL) {
   1041        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1042        return SECFailure;
   1043    }
   1044    if (!cx) {
   1045        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1046        return SECFailure;
   1047    }
   1048 #if defined(NSS_X86_OR_X64) || defined(USE_HW_AES)
   1049    use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0;
   1050 #else
   1051    use_hw_aes = PR_FALSE;
   1052 #endif
   1053    /* Nb = (block size in bits) / 32 */
   1054    cx->Nb = AES_BLOCK_SIZE / 4;
   1055    /* Nk = (key size in bits) / 32 */
   1056    Nk = keysize / 4;
   1057    /* Obtain number of rounds from "table" */
   1058    cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
   1059    /* copy in the iv, if neccessary */
   1060    if (mode == NSS_AES_CBC) {
   1061        memcpy(cx->iv, iv, AES_BLOCK_SIZE);
   1062 #ifdef USE_HW_AES
   1063        if (use_hw_aes) {
   1064            cx->worker = freeblCipher_native_aes_cbc_worker(encrypt, keysize);
   1065        } else
   1066 #endif
   1067        {
   1068            cx->worker = encrypt ? freeblCipher_rijndael_encryptCBC : freeblCipher_rijndael_decryptCBC;
   1069        }
   1070    } else {
   1071 #ifdef USE_HW_AES
   1072        if (use_hw_aes) {
   1073            cx->worker = freeblCipher_native_aes_ecb_worker(encrypt, keysize);
   1074        } else
   1075 #endif
   1076        {
   1077            cx->worker = encrypt ? freeblCipher_rijndael_encryptECB : freeblCipher_rijndael_decryptECB;
   1078        }
   1079    }
   1080    PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
   1081    if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
   1082        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
   1083        return SECFailure;
   1084    }
   1085 #ifdef USE_HW_AES
   1086    if (use_hw_aes) {
   1087        native_aes_init(encrypt, keysize);
   1088    } else
   1089 #endif
   1090    {
   1091        /* Generate expanded key */
   1092        if (encrypt) {
   1093            if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||
   1094                               cx->mode == NSS_AES_CTR)) {
   1095                PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
   1096                /* Prepare hardware key for normal AES parameters. */
   1097                rijndael_native_key_expansion(cx, key, Nk);
   1098            } else {
   1099                rijndael_key_expansion(cx, key, Nk);
   1100            }
   1101        } else {
   1102            rijndael_invkey_expansion(cx, key, Nk);
   1103        }
   1104        BLAPI_CLEAR_STACK(256)
   1105    }
   1106    cx->worker_cx = cx;
   1107    cx->destroy = NULL;
   1108    cx->isBlock = PR_TRUE;
   1109    return SECSuccess;
   1110 }
   1111 
   1112 SECStatus
   1113 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
   1114                const unsigned char *iv, int mode, unsigned int encrypt,
   1115                unsigned int blocksize)
   1116 {
   1117    int basemode = mode;
   1118    PRBool baseencrypt = encrypt;
   1119    SECStatus rv;
   1120 
   1121    if (blocksize != AES_BLOCK_SIZE) {
   1122        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1123        return SECFailure;
   1124    }
   1125 
   1126    switch (mode) {
   1127        case NSS_AES_CTS:
   1128            basemode = NSS_AES_CBC;
   1129            break;
   1130        case NSS_AES_GCM:
   1131        case NSS_AES_CTR:
   1132            basemode = NSS_AES;
   1133            baseencrypt = PR_TRUE;
   1134            break;
   1135    }
   1136    /* Make sure enough is initialized so we can safely call Destroy. */
   1137    cx->worker_cx = NULL;
   1138    cx->destroy = NULL;
   1139    cx->mode = mode;
   1140    rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt);
   1141    if (rv != SECSuccess) {
   1142        AES_DestroyContext(cx, PR_FALSE);
   1143        return rv;
   1144    }
   1145 
   1146    /* finally, set up any mode specific contexts */
   1147    cx->worker_aead = 0;
   1148    switch (mode) {
   1149        case NSS_AES_CTS:
   1150            cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv);
   1151            cx->worker = encrypt ? freeblCipher_CTS_EncryptUpdate : freeblCipher_CTS_DecryptUpdate;
   1152            cx->destroy = freeblDestroy_CTS_DestroyContext;
   1153            cx->isBlock = PR_FALSE;
   1154            break;
   1155        case NSS_AES_GCM:
   1156 #if defined(INTEL_GCM) && defined(USE_HW_AES)
   1157            if (aesni_support() && (keysize % 8) == 0 && avx_support() &&
   1158                clmul_support()) {
   1159                cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv);
   1160                cx->worker = encrypt ? freeblCipher_intel_AES_GCM_EncryptUpdate
   1161                                     : freeblCipher_intel_AES_GCM_DecryptUpdate;
   1162                cx->worker_aead = encrypt ? freeblAead_intel_AES_GCM_EncryptAEAD
   1163                                          : freeblAead_intel_AES_GCM_DecryptAEAD;
   1164                cx->destroy = freeblDestroy_intel_AES_GCM_DestroyContext;
   1165                cx->isBlock = PR_FALSE;
   1166            } else
   1167 #elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
   1168            if (ppc_crypto_support() && (keysize % 8) == 0) {
   1169                cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv);
   1170                cx->worker = encrypt ? freeblCipher_ppc_AES_GCM_EncryptUpdate
   1171                                     : freeblCipher_ppc_AES_GCM_DecryptUpdate;
   1172                cx->worker_aead = encrypt ? freeblAead_ppc_AES_GCM_EncryptAEAD
   1173                                          : freeblAead_ppc_AES_GCM_DecryptAEAD;
   1174                cx->destroy = freeblDestroy_ppc_AES_GCM_DestroyContext;
   1175                cx->isBlock = PR_FALSE;
   1176            } else
   1177 #endif
   1178            {
   1179                cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);
   1180                cx->worker = encrypt ? freeblCipher_GCM_EncryptUpdate
   1181                                     : freeblCipher_GCM_DecryptUpdate;
   1182                cx->worker_aead = encrypt ? freeblAead_GCM_EncryptAEAD
   1183                                          : freeblAead_GCM_DecryptAEAD;
   1184 
   1185                cx->destroy = freeblDestroy_GCM_DestroyContext;
   1186                cx->isBlock = PR_FALSE;
   1187            }
   1188            break;
   1189        case NSS_AES_CTR:
   1190            cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv);
   1191 #if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)
   1192            if (aesni_support() && (keysize % 8) == 0) {
   1193                cx->worker = freeblCipher_CTR_Update_HW_AES;
   1194            } else
   1195 #endif
   1196            {
   1197                cx->worker = freeblCipher_CTR_Update;
   1198            }
   1199            cx->destroy = freeblDestroy_CTR_DestroyContext;
   1200            cx->isBlock = PR_FALSE;
   1201            break;
   1202        default:
   1203            /* everything has already been set up by aes_InitContext, just
   1204             * return */
   1205            return SECSuccess;
   1206    }
   1207    /* check to see if we succeeded in getting the worker context */
   1208    if (cx->worker_cx == NULL) {
   1209        /* no, just destroy the existing context */
   1210        cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
   1211                            /* below that this isn't necessary */
   1212        AES_DestroyContext(cx, PR_FALSE);
   1213        return SECFailure;
   1214    }
   1215    return SECSuccess;
   1216 }
   1217 
   1218 /* AES_CreateContext
   1219 *
   1220 * create a new context for Rijndael operations
   1221 */
   1222 AESContext *
   1223 AES_CreateContext(const unsigned char *key, const unsigned char *iv,
   1224                  int mode, int encrypt,
   1225                  unsigned int keysize, unsigned int blocksize)
   1226 {
   1227    AESContext *cx = AES_AllocateContext();
   1228    if (cx) {
   1229        SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
   1230                                       blocksize);
   1231        if (rv != SECSuccess) {
   1232            AES_DestroyContext(cx, PR_TRUE);
   1233            cx = NULL;
   1234        }
   1235    }
   1236    return cx;
   1237 }
   1238 
   1239 /*
   1240 * AES_DestroyContext
   1241 *
   1242 * Zero an AES cipher context.  If freeit is true, also free the pointer
   1243 * to the context.
   1244 */
   1245 void
   1246 AES_DestroyContext(AESContext *cx, PRBool freeit)
   1247 {
   1248    void *mem = cx->mem;
   1249    if (cx->worker_cx && cx->destroy) {
   1250        (*cx->destroy)(cx->worker_cx, PR_TRUE);
   1251        cx->worker_cx = NULL;
   1252        cx->destroy = NULL;
   1253    }
   1254    PORT_SafeZero(cx, sizeof(AESContext));
   1255    if (freeit) {
   1256        PORT_Free(mem);
   1257    } else {
   1258        /* if we are not freeing the context, restore mem, We may get called
   1259         * again to actually free the context */
   1260        cx->mem = mem;
   1261    }
   1262 }
   1263 
   1264 /*
   1265 * AES_Encrypt
   1266 *
   1267 * Encrypt an arbitrary-length buffer.  The output buffer must already be
   1268 * allocated to at least inputLen.
   1269 */
   1270 SECStatus
   1271 AES_Encrypt(AESContext *cx, unsigned char *output,
   1272            unsigned int *outputLen, unsigned int maxOutputLen,
   1273            const unsigned char *input, unsigned int inputLen)
   1274 {
   1275    /* Check args */
   1276    SECStatus rv;
   1277    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
   1278        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1279        return SECFailure;
   1280    }
   1281    if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
   1282        PORT_SetError(SEC_ERROR_INPUT_LEN);
   1283        return SECFailure;
   1284    }
   1285    if (maxOutputLen < inputLen) {
   1286        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1287        return SECFailure;
   1288    }
   1289    *outputLen = inputLen;
   1290 #if UINT_MAX > MP_32BIT_MAX
   1291    /*
   1292     * we can guarentee that GSM won't overlfow if we limit the input to
   1293     * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
   1294     *
   1295     * We do it here to cover both hardware and software GCM operations.
   1296     */
   1297    {
   1298        PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
   1299    }
   1300    if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
   1301        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1302        return SECFailure;
   1303    }
   1304 #else
   1305    /* if we can't pass in a 32_bit number, then no such check needed */
   1306    {
   1307        PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
   1308    }
   1309 #endif
   1310 
   1311    rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
   1312                       input, inputLen, AES_BLOCK_SIZE);
   1313    BLAPI_CLEAR_STACK(256)
   1314    return rv;
   1315 }
   1316 
   1317 /*
   1318 * AES_Decrypt
   1319 *
   1320 * Decrypt and arbitrary-length buffer.  The output buffer must already be
   1321 * allocated to at least inputLen.
   1322 */
   1323 SECStatus
   1324 AES_Decrypt(AESContext *cx, unsigned char *output,
   1325            unsigned int *outputLen, unsigned int maxOutputLen,
   1326            const unsigned char *input, unsigned int inputLen)
   1327 {
   1328    SECStatus rv;
   1329    /* Check args */
   1330    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
   1331        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1332        return SECFailure;
   1333    }
   1334    if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
   1335        PORT_SetError(SEC_ERROR_INPUT_LEN);
   1336        return SECFailure;
   1337    }
   1338    if ((cx->mode != NSS_AES_GCM) && (maxOutputLen < inputLen)) {
   1339        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1340        return SECFailure;
   1341    }
   1342    *outputLen = inputLen;
   1343    rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
   1344                       input, inputLen, AES_BLOCK_SIZE);
   1345    BLAPI_CLEAR_STACK(256)
   1346    return rv;
   1347 }
   1348 
   1349 /*
   1350 * AES_Encrypt_AEAD
   1351 *
   1352 * Encrypt using GCM or CCM. include the nonce, extra data, and the tag
   1353 */
   1354 SECStatus
   1355 AES_AEAD(AESContext *cx, unsigned char *output,
   1356         unsigned int *outputLen, unsigned int maxOutputLen,
   1357         const unsigned char *input, unsigned int inputLen,
   1358         void *params, unsigned int paramsLen,
   1359         const unsigned char *aad, unsigned int aadLen)
   1360 {
   1361    SECStatus rv;
   1362    /* Check args */
   1363    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0) || (aad == NULL && aadLen != 0) || params == NULL) {
   1364        PORT_SetError(SEC_ERROR_INVALID_ARGS);
   1365        return SECFailure;
   1366    }
   1367    if (cx->worker_aead == NULL) {
   1368        PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
   1369        return SECFailure;
   1370    }
   1371    if (maxOutputLen < inputLen) {
   1372        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1373        return SECFailure;
   1374    }
   1375    *outputLen = inputLen;
   1376 #if UINT_MAX > MP_32BIT_MAX
   1377    /*
   1378     * we can guarentee that GSM won't overlfow if we limit the input to
   1379     * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
   1380     *
   1381     * We do it here to cover both hardware and software GCM operations.
   1382     */
   1383    {
   1384        PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
   1385    }
   1386    if (inputLen > MP_32BIT_MAX) {
   1387        PORT_SetError(SEC_ERROR_OUTPUT_LEN);
   1388        return SECFailure;
   1389    }
   1390 #else
   1391    /* if we can't pass in a 32_bit number, then no such check needed */
   1392    {
   1393        PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
   1394    }
   1395 #endif
   1396 
   1397    rv = (*cx->worker_aead)(cx->worker_cx, output, outputLen, maxOutputLen,
   1398                            input, inputLen, params, paramsLen, aad, aadLen,
   1399                            AES_BLOCK_SIZE);
   1400    BLAPI_CLEAR_STACK(256)
   1401    return rv;
   1402 }