tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mpi_sparc.c (5733B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /* Multiplication performance enhancements for sparc v8+vis CPUs. */
      6 
      7 #include "mpi-priv.h"
      8 #include <stddef.h>
      9 #include <sys/systeminfo.h>
     10 #include <strings.h>
     11 
     12 /* In the functions below, */
     13 /* vector y must be 8-byte aligned, and n must be even */
     14 /* returns carry out of high order word of result */
     15 /* maximum n is 256 */
     16 
     17 /* vector x += vector y * scaler a; where y is of length n words. */
     18 extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
     19 
     20 /* vector z = vector x + vector y * scaler a; where y is of length n words. */
     21 extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
     22                        int n, mp_digit a);
     23 
     24 /* v8 versions of these functions run on any Sparc v8 CPU. */
     25 
     26 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
     27 #define MP_MUL_DxD(a, b, Phi, Plo)                              \
     28    {                                                           \
     29        unsigned long long product = (unsigned long long)a * b; \
     30        Plo = (mp_digit)product;                                \
     31        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
     32    }
     33 
     34 /* c = a * b */
     35 static void
     36 v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
     37 {
     38 #if !defined(MP_NO_MP_WORD)
     39    mp_digit d = 0;
     40 
     41    /* Inner product:  Digits of a */
     42    while (a_len--) {
     43        mp_word w = ((mp_word)b * *a++) + d;
     44        *c++ = ACCUM(w);
     45        d = CARRYOUT(w);
     46    }
     47    *c = d;
     48 #else
     49    mp_digit carry = 0;
     50    while (a_len--) {
     51        mp_digit a_i = *a++;
     52        mp_digit a0b0, a1b1;
     53 
     54        MP_MUL_DxD(a_i, b, a1b1, a0b0);
     55 
     56        a0b0 += carry;
     57        if (a0b0 < carry)
     58            ++a1b1;
     59        *c++ = a0b0;
     60        carry = a1b1;
     61    }
     62    *c = carry;
     63 #endif
     64 }
     65 
     66 /* c += a * b */
     67 static void
     68 v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
     69 {
     70 #if !defined(MP_NO_MP_WORD)
     71    mp_digit d = 0;
     72 
     73    /* Inner product:  Digits of a */
     74    while (a_len--) {
     75        mp_word w = ((mp_word)b * *a++) + *c + d;
     76        *c++ = ACCUM(w);
     77        d = CARRYOUT(w);
     78    }
     79    *c = d;
     80 #else
     81    mp_digit carry = 0;
     82    while (a_len--) {
     83        mp_digit a_i = *a++;
     84        mp_digit a0b0, a1b1;
     85 
     86        MP_MUL_DxD(a_i, b, a1b1, a0b0);
     87 
     88        a0b0 += carry;
     89        if (a0b0 < carry)
     90            ++a1b1;
     91        a0b0 += a_i = *c;
     92        if (a0b0 < a_i)
     93            ++a1b1;
     94        *c++ = a0b0;
     95        carry = a1b1;
     96    }
     97    *c = carry;
     98 #endif
     99 }
    100 
    101 /* Presently, this is only used by the Montgomery arithmetic code. */
    102 /* c += a * b */
    103 static void
    104 v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
    105 {
    106 #if !defined(MP_NO_MP_WORD)
    107    mp_digit d = 0;
    108 
    109    /* Inner product:  Digits of a */
    110    while (a_len--) {
    111        mp_word w = ((mp_word)b * *a++) + *c + d;
    112        *c++ = ACCUM(w);
    113        d = CARRYOUT(w);
    114    }
    115 
    116    while (d) {
    117        mp_word w = (mp_word)*c + d;
    118        *c++ = ACCUM(w);
    119        d = CARRYOUT(w);
    120    }
    121 #else
    122    mp_digit carry = 0;
    123    while (a_len--) {
    124        mp_digit a_i = *a++;
    125        mp_digit a0b0, a1b1;
    126 
    127        MP_MUL_DxD(a_i, b, a1b1, a0b0);
    128 
    129        a0b0 += carry;
    130        if (a0b0 < carry)
    131            ++a1b1;
    132 
    133        a0b0 += a_i = *c;
    134        if (a0b0 < a_i)
    135            ++a1b1;
    136 
    137        *c++ = a0b0;
    138        carry = a1b1;
    139    }
    140    while (carry) {
    141        mp_digit c_i = *c;
    142        carry += c_i;
    143        *c++ = carry;
    144        carry = carry < c_i;
    145    }
    146 #endif
    147 }
    148 
    149 /* These functions run only on v8plus+vis or v9+vis CPUs. */
    150 
    151 /* c = a * b */
    152 void
    153 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
    154 {
    155    mp_digit d;
    156    mp_digit x[258];
    157    if (a_len <= 256) {
    158        if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
    159            mp_digit *px;
    160            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
    161            memcpy(px, a, a_len * sizeof(*a));
    162            a = px;
    163            if (a_len & 1) {
    164                px[a_len] = 0;
    165            }
    166        }
    167        s_mp_setz(c, a_len + 1);
    168        d = mul_add_inp(c, a, a_len, b);
    169        c[a_len] = d;
    170    } else {
    171        v8_mpv_mul_d(a, a_len, b, c);
    172    }
    173 }
    174 
    175 /* c += a * b, where a is a_len words long. */
    176 void
    177 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
    178 {
    179    mp_digit d;
    180    mp_digit x[258];
    181    if (a_len <= 256) {
    182        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
    183            mp_digit *px;
    184            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
    185            memcpy(px, a, a_len * sizeof(*a));
    186            a = px;
    187            if (a_len & 1) {
    188                px[a_len] = 0;
    189            }
    190        }
    191        d = mul_add_inp(c, a, a_len, b);
    192        c[a_len] = d;
    193    } else {
    194        v8_mpv_mul_d_add(a, a_len, b, c);
    195    }
    196 }
    197 
    198 /* c += a * b, where a is y words long. */
    199 void
    200 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
    201 {
    202    mp_digit d;
    203    mp_digit x[258];
    204    if (a_len <= 256) {
    205        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
    206            mp_digit *px;
    207            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
    208            memcpy(px, a, a_len * sizeof(*a));
    209            a = px;
    210            if (a_len & 1) {
    211                px[a_len] = 0;
    212            }
    213        }
    214        d = mul_add_inp(c, a, a_len, b);
    215        if (d) {
    216            c += a_len;
    217            do {
    218                mp_digit sum = d + *c;
    219                *c++ = sum;
    220                d = sum < d;
    221            } while (d);
    222        }
    223    } else {
    224        v8_mpv_mul_d_add_prop(a, a_len, b, c);
    225    }
    226 }