tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sha256-armv8.c (6623B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #ifdef USE_HW_SHA2
      6 
      7 #ifndef __ARM_FEATURE_CRYPTO
      8 #error "Compiler option is invalid"
      9 #endif
     10 
     11 #ifdef FREEBL_NO_DEPEND
     12 #include "stubs.h"
     13 #endif
     14 
     15 #include "prcpucfg.h"
     16 #include "prtypes.h" /* for PRUintXX */
     17 #include "prlong.h"
     18 #include "blapi.h"
     19 #include "sha256.h"
     20 
     21 #include <arm_neon.h>
     22 
     23 /* SHA-256 constants, K256. */
     24 static const PRUint32 __attribute__((aligned(16))) K256[64] = {
     25    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
     26    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
     27    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
     28    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
     29    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
     30    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
     31    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
     32    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
     33    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
     34    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
     35    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
     36    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
     37    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
     38    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
     39    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
     40    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
     41 };
     42 
     43 #define ROUND(n, a, b, c, d)               \
     44    {                                      \
     45        uint32x4_t t = vaddq_u32(a, k##n); \
     46        uint32x4_t wt = w0;                \
     47        w0 = vsha256hq_u32(w0, w1, t);     \
     48        w1 = vsha256h2q_u32(w1, wt, t);    \
     49        if (n < 12) {                      \
     50            a = vsha256su0q_u32(a, b);     \
     51            a = vsha256su1q_u32(a, c, d);  \
     52        }                                  \
     53    }
     54 
     55 void
     56 SHA256_Compress_Native(SHA256Context *ctx)
     57 {
     58    const uint32x4_t k0 = vld1q_u32(K256);
     59    const uint32x4_t k1 = vld1q_u32(K256 + 4);
     60    const uint32x4_t k2 = vld1q_u32(K256 + 8);
     61    const uint32x4_t k3 = vld1q_u32(K256 + 12);
     62    const uint32x4_t k4 = vld1q_u32(K256 + 16);
     63    const uint32x4_t k5 = vld1q_u32(K256 + 20);
     64    const uint32x4_t k6 = vld1q_u32(K256 + 24);
     65    const uint32x4_t k7 = vld1q_u32(K256 + 28);
     66    const uint32x4_t k8 = vld1q_u32(K256 + 32);
     67    const uint32x4_t k9 = vld1q_u32(K256 + 36);
     68    const uint32x4_t k10 = vld1q_u32(K256 + 40);
     69    const uint32x4_t k11 = vld1q_u32(K256 + 44);
     70    const uint32x4_t k12 = vld1q_u32(K256 + 48);
     71    const uint32x4_t k13 = vld1q_u32(K256 + 52);
     72    const uint32x4_t k14 = vld1q_u32(K256 + 56);
     73    const uint32x4_t k15 = vld1q_u32(K256 + 60);
     74 
     75    uint32x4_t h0 = vld1q_u32(ctx->h);
     76    uint32x4_t h1 = vld1q_u32(ctx->h + 4);
     77 
     78    unsigned char *input = ctx->u.b;
     79 
     80    uint32x4_t a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input)));
     81    uint32x4_t b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16)));
     82    uint32x4_t c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32)));
     83    uint32x4_t d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48)));
     84 
     85    uint32x4_t w0 = h0;
     86    uint32x4_t w1 = h1;
     87 
     88    ROUND(0, a, b, c, d)
     89    ROUND(1, b, c, d, a)
     90    ROUND(2, c, d, a, b)
     91    ROUND(3, d, a, b, c)
     92    ROUND(4, a, b, c, d)
     93    ROUND(5, b, c, d, a)
     94    ROUND(6, c, d, a, b)
     95    ROUND(7, d, a, b, c)
     96    ROUND(8, a, b, c, d)
     97    ROUND(9, b, c, d, a)
     98    ROUND(10, c, d, a, b)
     99    ROUND(11, d, a, b, c)
    100    ROUND(12, a, b, c, d)
    101    ROUND(13, b, c, d, a)
    102    ROUND(14, c, d, a, b)
    103    ROUND(15, d, a, b, c)
    104 
    105    h0 = vaddq_u32(h0, w0);
    106    h1 = vaddq_u32(h1, w1);
    107 
    108    vst1q_u32(ctx->h, h0);
    109    vst1q_u32(ctx->h + 4, h1);
    110 }
    111 
    112 void
    113 SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input,
    114                     unsigned int inputLen)
    115 {
    116    const uint32x4_t k0 = vld1q_u32(K256);
    117    const uint32x4_t k1 = vld1q_u32(K256 + 4);
    118    const uint32x4_t k2 = vld1q_u32(K256 + 8);
    119    const uint32x4_t k3 = vld1q_u32(K256 + 12);
    120    const uint32x4_t k4 = vld1q_u32(K256 + 16);
    121    const uint32x4_t k5 = vld1q_u32(K256 + 20);
    122    const uint32x4_t k6 = vld1q_u32(K256 + 24);
    123    const uint32x4_t k7 = vld1q_u32(K256 + 28);
    124    const uint32x4_t k8 = vld1q_u32(K256 + 32);
    125    const uint32x4_t k9 = vld1q_u32(K256 + 36);
    126    const uint32x4_t k10 = vld1q_u32(K256 + 40);
    127    const uint32x4_t k11 = vld1q_u32(K256 + 44);
    128    const uint32x4_t k12 = vld1q_u32(K256 + 48);
    129    const uint32x4_t k13 = vld1q_u32(K256 + 52);
    130    const uint32x4_t k14 = vld1q_u32(K256 + 56);
    131    const uint32x4_t k15 = vld1q_u32(K256 + 60);
    132 
    133    unsigned int inBuf = ctx->sizeLo & 0x3f;
    134    if (!inputLen) {
    135        return;
    136    }
    137 
    138    /* Add inputLen into the count of bytes processed, before processing */
    139    if ((ctx->sizeLo += inputLen) < inputLen) {
    140        ctx->sizeHi++;
    141    }
    142 
    143    /* if data already in buffer, attemp to fill rest of buffer */
    144    if (inBuf) {
    145        unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
    146        if (inputLen < todo) {
    147            todo = inputLen;
    148        }
    149        memcpy(ctx->u.b + inBuf, input, todo);
    150        input += todo;
    151        inputLen -= todo;
    152        if (inBuf + todo == SHA256_BLOCK_LENGTH) {
    153            SHA256_Compress_Native(ctx);
    154        }
    155    }
    156 
    157    uint32x4_t h0 = vld1q_u32(ctx->h);
    158    uint32x4_t h1 = vld1q_u32(ctx->h + 4);
    159 
    160    /* if enough data to fill one or more whole buffers, process them. */
    161    while (inputLen >= SHA256_BLOCK_LENGTH) {
    162        uint32x4_t a, b, c, d;
    163        a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input)));
    164        b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16)));
    165        c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32)));
    166        d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48)));
    167        input += SHA256_BLOCK_LENGTH;
    168        inputLen -= SHA256_BLOCK_LENGTH;
    169 
    170        uint32x4_t w0 = h0;
    171        uint32x4_t w1 = h1;
    172 
    173        ROUND(0, a, b, c, d)
    174        ROUND(1, b, c, d, a)
    175        ROUND(2, c, d, a, b)
    176        ROUND(3, d, a, b, c)
    177        ROUND(4, a, b, c, d)
    178        ROUND(5, b, c, d, a)
    179        ROUND(6, c, d, a, b)
    180        ROUND(7, d, a, b, c)
    181        ROUND(8, a, b, c, d)
    182        ROUND(9, b, c, d, a)
    183        ROUND(10, c, d, a, b)
    184        ROUND(11, d, a, b, c)
    185        ROUND(12, a, b, c, d)
    186        ROUND(13, b, c, d, a)
    187        ROUND(14, c, d, a, b)
    188        ROUND(15, d, a, b, c)
    189 
    190        h0 = vaddq_u32(h0, w0);
    191        h1 = vaddq_u32(h1, w1);
    192    }
    193 
    194    vst1q_u32(ctx->h, h0);
    195    vst1q_u32(ctx->h + 4, h1);
    196 
    197    /* if data left over, fill it into buffer */
    198    if (inputLen) {
    199        memcpy(ctx->u.b, input, inputLen);
    200    }
    201 }
    202 
    203 #endif /* USE_HW_SHA2 */