tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mpmontg.c (35044B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /* This file implements moduluar exponentiation using Montgomery's
      6 * method for modular reduction.  This file implements the method
      7 * described as "Improvement 2" in the paper "A Cryptogrpahic Library for
      8 * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
      9 * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
     10 * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
     11 * published by Springer Verlag.
     12 */
     13 
     14 #define MP_USING_CACHE_SAFE_MOD_EXP 1
     15 #include <string.h>
     16 #include "mpi-priv.h"
     17 #include "mplogic.h"
     18 #include "mpprime.h"
     19 #ifdef MP_USING_MONT_MULF
     20 #include "montmulf.h"
     21 #endif
     22 #include <stddef.h> /* ptrdiff_t */
     23 #include <assert.h>
     24 
     25 #define STATIC
     26 
     27 #define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
     28 
     29 /*! computes T = REDC(T), 2^b == R
     30    \param T < RN
     31 */
     32 mp_err
     33 s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
     34 {
     35    mp_err res;
     36    mp_size i;
     37 
     38    i = (MP_USED(&mmm->N) << 1) + 1;
     39    MP_CHECKOK(s_mp_pad(T, i));
     40    for (i = 0; i < MP_USED(&mmm->N); ++i) {
     41        mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
     42        /* T += N * m_i * (MP_RADIX ** i); */
     43        s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);
     44    }
     45    s_mp_clamp(T);
     46 
     47    /* T /= R */
     48    s_mp_rshd(T, MP_USED(&mmm->N));
     49 
     50    if (s_mp_cmp(T, &mmm->N) >= 0) {
     51        /* T = T - N */
     52        MP_CHECKOK(s_mp_sub(T, &mmm->N));
     53 #ifdef DEBUG
     54        if (mp_cmp(T, &mmm->N) >= 0) {
     55            res = MP_UNDEF;
     56            goto CLEANUP;
     57        }
     58 #endif
     59    }
     60    res = MP_OKAY;
     61 CLEANUP:
     62    return res;
     63 }
     64 
     65 #if !defined(MP_MONT_USE_MP_MUL)
     66 
     67 /*! c <- REDC( a * b ) mod N
     68    \param a < N  i.e. "reduced"
     69    \param b < N  i.e. "reduced"
     70    \param mmm modulus N and n0' of N
     71 */
     72 mp_err
     73 s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
     74              mp_mont_modulus *mmm)
     75 {
     76    mp_digit *pb;
     77    mp_digit m_i;
     78    mp_err res;
     79    mp_size ib; /* "index b": index of current digit of B */
     80    mp_size useda, usedb;
     81 
     82    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
     83 
     84    if (MP_USED(a) < MP_USED(b)) {
     85        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
     86        b = a;
     87        a = xch;
     88    }
     89 
     90    MP_USED(c) = 1;
     91    MP_DIGIT(c, 0) = 0;
     92    ib = (MP_USED(&mmm->N) << 1) + 1;
     93    if ((res = s_mp_pad(c, ib)) != MP_OKAY)
     94        goto CLEANUP;
     95 
     96    useda = MP_USED(a);
     97    pb = MP_DIGITS(b);
     98    s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
     99    s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
    100    m_i = MP_DIGIT(c, 0) * mmm->n0prime;
    101    s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
    102 
    103    /* Outer loop:  Digits of b */
    104    usedb = MP_USED(b);
    105    for (ib = 1; ib < usedb; ib++) {
    106        mp_digit b_i = *pb++;
    107 
    108        /* Inner product:  Digits of a */
    109        if (b_i)
    110            s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
    111        m_i = MP_DIGIT(c, ib) * mmm->n0prime;
    112        s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
    113    }
    114    if (usedb < MP_USED(&mmm->N)) {
    115        for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) {
    116            m_i = MP_DIGIT(c, ib) * mmm->n0prime;
    117            s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
    118        }
    119    }
    120    s_mp_clamp(c);
    121    s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */
    122    if (s_mp_cmp(c, &mmm->N) >= 0) {
    123        MP_CHECKOK(s_mp_sub(c, &mmm->N));
    124    }
    125    res = MP_OKAY;
    126 
    127 CLEANUP:
    128    return res;
    129 }
    130 #endif
    131 
    132 mp_err
    133 mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
    134 {
    135    mp_err res;
    136 
    137    /* xMont = x * R mod N   where  N is modulus */
    138    if (x != xMont) {
    139        MP_CHECKOK(mp_copy(x, xMont));
    140    }
    141    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
    142    MP_CHECKOK(mp_div(xMont, N, 0, xMont));   /*         mod N */
    143 CLEANUP:
    144    return res;
    145 }
    146 
    147 mp_digit
    148 mp_calculate_mont_n0i(const mp_int *N)
    149 {
    150    return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
    151 }
    152 
    153 #ifdef MP_USING_MONT_MULF
    154 
    155 /* the floating point multiply is already cache safe,
    156 * don't turn on cache safe unless we specifically
    157 * force it */
    158 #ifndef MP_FORCE_CACHE_SAFE
    159 #undef MP_USING_CACHE_SAFE_MOD_EXP
    160 #endif
    161 
    162 unsigned int mp_using_mont_mulf = 1;
    163 
    164 /* computes montgomery square of the integer in mResult */
    165 #define SQR                                              \
    166    conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
    167    mont_mulf_noconv(mResult, dm1, d16Tmp,               \
    168                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
    169 
    170 /* computes montgomery product of x and the integer in mResult */
    171 #define MUL(x)                                   \
    172    conv_i32_to_d32(dm1, mResult, nLen);         \
    173    mont_mulf_noconv(mResult, dm1, oddPowers[x], \
    174                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
    175 
    176 /* Do modular exponentiation using floating point multiply code. */
    177 mp_err
    178 mp_exptmod_f(const mp_int *montBase,
    179             const mp_int *exponent,
    180             const mp_int *modulus,
    181             mp_int *result,
    182             mp_mont_modulus *mmm,
    183             int nLen,
    184             mp_size bits_in_exponent,
    185             mp_size window_bits,
    186             mp_size odd_ints)
    187 {
    188    mp_digit *mResult;
    189    double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
    190    double dn0;
    191    mp_size i;
    192    mp_err res;
    193    int expOff;
    194    int dSize = 0, oddPowSize, dTmpSize;
    195    mp_int accum1;
    196    double *oddPowers[MAX_ODD_INTS];
    197 
    198    /* function for computing n0prime only works if n0 is odd */
    199 
    200    MP_DIGITS(&accum1) = 0;
    201 
    202    for (i = 0; i < MAX_ODD_INTS; ++i)
    203        oddPowers[i] = 0;
    204 
    205    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
    206 
    207    mp_set(&accum1, 1);
    208    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
    209    MP_CHECKOK(s_mp_pad(&accum1, nLen));
    210 
    211    oddPowSize = 2 * nLen + 1;
    212    dTmpSize = 2 * oddPowSize;
    213    dSize = sizeof(double) * (nLen * 4 + 1 +
    214                              ((odd_ints + 1) * oddPowSize) + dTmpSize);
    215    dBuf = malloc(dSize);
    216    if (!dBuf) {
    217        res = MP_MEM;
    218        goto CLEANUP;
    219    }
    220    dm1 = dBuf;           /* array of d32 */
    221    dn = dBuf + nLen;     /* array of d32 */
    222    dSqr = dn + nLen;     /* array of d32 */
    223    d16Tmp = dSqr + nLen; /* array of d16 */
    224    dTmp = d16Tmp + oddPowSize;
    225 
    226    for (i = 0; i < odd_ints; ++i) {
    227        oddPowers[i] = dTmp;
    228        dTmp += oddPowSize;
    229    }
    230    mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
    231 
    232    /* Make dn and dn0 */
    233    conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
    234    dn0 = (double)(mmm->n0prime & 0xffff);
    235 
    236    /* Make dSqr */
    237    conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
    238    mont_mulf_noconv(mResult, dm1, oddPowers[0],
    239                     dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
    240    conv_i32_to_d32(dSqr, mResult, nLen);
    241 
    242    for (i = 1; i < odd_ints; ++i) {
    243        mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
    244                         dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
    245        conv_i32_to_d16(oddPowers[i], mResult, nLen);
    246    }
    247 
    248    s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
    249 
    250    for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
    251        mp_size smallExp;
    252        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
    253        smallExp = (mp_size)res;
    254 
    255        if (window_bits == 1) {
    256            if (!smallExp) {
    257                SQR;
    258            } else if (smallExp & 1) {
    259                SQR;
    260                MUL(0);
    261            } else {
    262                abort();
    263            }
    264        } else if (window_bits == 4) {
    265            if (!smallExp) {
    266                SQR;
    267                SQR;
    268                SQR;
    269                SQR;
    270            } else if (smallExp & 1) {
    271                SQR;
    272                SQR;
    273                SQR;
    274                SQR;
    275                MUL(smallExp / 2);
    276            } else if (smallExp & 2) {
    277                SQR;
    278                SQR;
    279                SQR;
    280                MUL(smallExp / 4);
    281                SQR;
    282            } else if (smallExp & 4) {
    283                SQR;
    284                SQR;
    285                MUL(smallExp / 8);
    286                SQR;
    287                SQR;
    288            } else if (smallExp & 8) {
    289                SQR;
    290                MUL(smallExp / 16);
    291                SQR;
    292                SQR;
    293                SQR;
    294            } else {
    295                abort();
    296            }
    297        } else if (window_bits == 5) {
    298            if (!smallExp) {
    299                SQR;
    300                SQR;
    301                SQR;
    302                SQR;
    303                SQR;
    304            } else if (smallExp & 1) {
    305                SQR;
    306                SQR;
    307                SQR;
    308                SQR;
    309                SQR;
    310                MUL(smallExp / 2);
    311            } else if (smallExp & 2) {
    312                SQR;
    313                SQR;
    314                SQR;
    315                SQR;
    316                MUL(smallExp / 4);
    317                SQR;
    318            } else if (smallExp & 4) {
    319                SQR;
    320                SQR;
    321                SQR;
    322                MUL(smallExp / 8);
    323                SQR;
    324                SQR;
    325            } else if (smallExp & 8) {
    326                SQR;
    327                SQR;
    328                MUL(smallExp / 16);
    329                SQR;
    330                SQR;
    331                SQR;
    332            } else if (smallExp & 0x10) {
    333                SQR;
    334                MUL(smallExp / 32);
    335                SQR;
    336                SQR;
    337                SQR;
    338                SQR;
    339            } else {
    340                abort();
    341            }
    342        } else if (window_bits == 6) {
    343            if (!smallExp) {
    344                SQR;
    345                SQR;
    346                SQR;
    347                SQR;
    348                SQR;
    349                SQR;
    350            } else if (smallExp & 1) {
    351                SQR;
    352                SQR;
    353                SQR;
    354                SQR;
    355                SQR;
    356                SQR;
    357                MUL(smallExp / 2);
    358            } else if (smallExp & 2) {
    359                SQR;
    360                SQR;
    361                SQR;
    362                SQR;
    363                SQR;
    364                MUL(smallExp / 4);
    365                SQR;
    366            } else if (smallExp & 4) {
    367                SQR;
    368                SQR;
    369                SQR;
    370                SQR;
    371                MUL(smallExp / 8);
    372                SQR;
    373                SQR;
    374            } else if (smallExp & 8) {
    375                SQR;
    376                SQR;
    377                SQR;
    378                MUL(smallExp / 16);
    379                SQR;
    380                SQR;
    381                SQR;
    382            } else if (smallExp & 0x10) {
    383                SQR;
    384                SQR;
    385                MUL(smallExp / 32);
    386                SQR;
    387                SQR;
    388                SQR;
    389                SQR;
    390            } else if (smallExp & 0x20) {
    391                SQR;
    392                MUL(smallExp / 64);
    393                SQR;
    394                SQR;
    395                SQR;
    396                SQR;
    397                SQR;
    398            } else {
    399                abort();
    400            }
    401        } else {
    402            abort();
    403        }
    404    }
    405 
    406    s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
    407 
    408    res = s_mp_redc(&accum1, mmm);
    409    mp_exch(&accum1, result);
    410 
    411 CLEANUP:
    412    mp_clear(&accum1);
    413    if (dBuf) {
    414        if (dSize)
    415            memset(dBuf, 0, dSize);
    416        free(dBuf);
    417    }
    418 
    419    return res;
    420 }
    421 #undef SQR
    422 #undef MUL
    423 #endif
    424 
    425 #define SQR(a, b)             \
    426    MP_CHECKOK(mp_sqr(a, b)); \
    427    MP_CHECKOK(s_mp_redc(b, mmm))
    428 
    429 #if defined(MP_MONT_USE_MP_MUL)
    430 #define MUL(x, a, b)                           \
    431    MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \
    432    MP_CHECKOK(s_mp_redc(b, mmm))
    433 #else
    434 #define MUL(x, a, b) \
    435    MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm))
    436 #endif
    437 
    438 #define SWAPPA  \
    439    ptmp = pa1; \
    440    pa1 = pa2;  \
    441    pa2 = ptmp
    442 
    443 /* Do modular exponentiation using integer multiply code. */
    444 mp_err
    445 mp_exptmod_i(const mp_int *montBase,
    446             const mp_int *exponent,
    447             const mp_int *modulus,
    448             mp_int *result,
    449             mp_mont_modulus *mmm,
    450             int nLen,
    451             mp_size bits_in_exponent,
    452             mp_size window_bits,
    453             mp_size odd_ints)
    454 {
    455    mp_int *pa1, *pa2, *ptmp;
    456    mp_size i;
    457    mp_err res;
    458    int expOff;
    459    mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
    460 
    461    /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
    462    /* oddPowers[i] = base ** (2*i + 1); */
    463 
    464    MP_DIGITS(&accum1) = 0;
    465    MP_DIGITS(&accum2) = 0;
    466    MP_DIGITS(&power2) = 0;
    467    for (i = 0; i < MAX_ODD_INTS; ++i) {
    468        MP_DIGITS(oddPowers + i) = 0;
    469    }
    470 
    471    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
    472    MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
    473 
    474    MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase));
    475 
    476    MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2));
    477    MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */
    478    MP_CHECKOK(s_mp_redc(&power2, mmm));
    479 
    480    for (i = 1; i < odd_ints; ++i) {
    481        MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
    482        MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
    483        MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
    484    }
    485 
    486    /* set accumulator to montgomery residue of 1 */
    487    mp_set(&accum1, 1);
    488    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
    489    pa1 = &accum1;
    490    pa2 = &accum2;
    491 
    492    for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
    493        mp_size smallExp;
    494        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
    495        smallExp = (mp_size)res;
    496 
    497        if (window_bits == 1) {
    498            if (!smallExp) {
    499                SQR(pa1, pa2);
    500                SWAPPA;
    501            } else if (smallExp & 1) {
    502                SQR(pa1, pa2);
    503                MUL(0, pa2, pa1);
    504            } else {
    505                abort();
    506            }
    507        } else if (window_bits == 4) {
    508            if (!smallExp) {
    509                SQR(pa1, pa2);
    510                SQR(pa2, pa1);
    511                SQR(pa1, pa2);
    512                SQR(pa2, pa1);
    513            } else if (smallExp & 1) {
    514                SQR(pa1, pa2);
    515                SQR(pa2, pa1);
    516                SQR(pa1, pa2);
    517                SQR(pa2, pa1);
    518                MUL(smallExp / 2, pa1, pa2);
    519                SWAPPA;
    520            } else if (smallExp & 2) {
    521                SQR(pa1, pa2);
    522                SQR(pa2, pa1);
    523                SQR(pa1, pa2);
    524                MUL(smallExp / 4, pa2, pa1);
    525                SQR(pa1, pa2);
    526                SWAPPA;
    527            } else if (smallExp & 4) {
    528                SQR(pa1, pa2);
    529                SQR(pa2, pa1);
    530                MUL(smallExp / 8, pa1, pa2);
    531                SQR(pa2, pa1);
    532                SQR(pa1, pa2);
    533                SWAPPA;
    534            } else if (smallExp & 8) {
    535                SQR(pa1, pa2);
    536                MUL(smallExp / 16, pa2, pa1);
    537                SQR(pa1, pa2);
    538                SQR(pa2, pa1);
    539                SQR(pa1, pa2);
    540                SWAPPA;
    541            } else {
    542                abort();
    543            }
    544        } else if (window_bits == 5) {
    545            if (!smallExp) {
    546                SQR(pa1, pa2);
    547                SQR(pa2, pa1);
    548                SQR(pa1, pa2);
    549                SQR(pa2, pa1);
    550                SQR(pa1, pa2);
    551                SWAPPA;
    552            } else if (smallExp & 1) {
    553                SQR(pa1, pa2);
    554                SQR(pa2, pa1);
    555                SQR(pa1, pa2);
    556                SQR(pa2, pa1);
    557                SQR(pa1, pa2);
    558                MUL(smallExp / 2, pa2, pa1);
    559            } else if (smallExp & 2) {
    560                SQR(pa1, pa2);
    561                SQR(pa2, pa1);
    562                SQR(pa1, pa2);
    563                SQR(pa2, pa1);
    564                MUL(smallExp / 4, pa1, pa2);
    565                SQR(pa2, pa1);
    566            } else if (smallExp & 4) {
    567                SQR(pa1, pa2);
    568                SQR(pa2, pa1);
    569                SQR(pa1, pa2);
    570                MUL(smallExp / 8, pa2, pa1);
    571                SQR(pa1, pa2);
    572                SQR(pa2, pa1);
    573            } else if (smallExp & 8) {
    574                SQR(pa1, pa2);
    575                SQR(pa2, pa1);
    576                MUL(smallExp / 16, pa1, pa2);
    577                SQR(pa2, pa1);
    578                SQR(pa1, pa2);
    579                SQR(pa2, pa1);
    580            } else if (smallExp & 0x10) {
    581                SQR(pa1, pa2);
    582                MUL(smallExp / 32, pa2, pa1);
    583                SQR(pa1, pa2);
    584                SQR(pa2, pa1);
    585                SQR(pa1, pa2);
    586                SQR(pa2, pa1);
    587            } else {
    588                abort();
    589            }
    590        } else if (window_bits == 6) {
    591            if (!smallExp) {
    592                SQR(pa1, pa2);
    593                SQR(pa2, pa1);
    594                SQR(pa1, pa2);
    595                SQR(pa2, pa1);
    596                SQR(pa1, pa2);
    597                SQR(pa2, pa1);
    598            } else if (smallExp & 1) {
    599                SQR(pa1, pa2);
    600                SQR(pa2, pa1);
    601                SQR(pa1, pa2);
    602                SQR(pa2, pa1);
    603                SQR(pa1, pa2);
    604                SQR(pa2, pa1);
    605                MUL(smallExp / 2, pa1, pa2);
    606                SWAPPA;
    607            } else if (smallExp & 2) {
    608                SQR(pa1, pa2);
    609                SQR(pa2, pa1);
    610                SQR(pa1, pa2);
    611                SQR(pa2, pa1);
    612                SQR(pa1, pa2);
    613                MUL(smallExp / 4, pa2, pa1);
    614                SQR(pa1, pa2);
    615                SWAPPA;
    616            } else if (smallExp & 4) {
    617                SQR(pa1, pa2);
    618                SQR(pa2, pa1);
    619                SQR(pa1, pa2);
    620                SQR(pa2, pa1);
    621                MUL(smallExp / 8, pa1, pa2);
    622                SQR(pa2, pa1);
    623                SQR(pa1, pa2);
    624                SWAPPA;
    625            } else if (smallExp & 8) {
    626                SQR(pa1, pa2);
    627                SQR(pa2, pa1);
    628                SQR(pa1, pa2);
    629                MUL(smallExp / 16, pa2, pa1);
    630                SQR(pa1, pa2);
    631                SQR(pa2, pa1);
    632                SQR(pa1, pa2);
    633                SWAPPA;
    634            } else if (smallExp & 0x10) {
    635                SQR(pa1, pa2);
    636                SQR(pa2, pa1);
    637                MUL(smallExp / 32, pa1, pa2);
    638                SQR(pa2, pa1);
    639                SQR(pa1, pa2);
    640                SQR(pa2, pa1);
    641                SQR(pa1, pa2);
    642                SWAPPA;
    643            } else if (smallExp & 0x20) {
    644                SQR(pa1, pa2);
    645                MUL(smallExp / 64, pa2, pa1);
    646                SQR(pa1, pa2);
    647                SQR(pa2, pa1);
    648                SQR(pa1, pa2);
    649                SQR(pa2, pa1);
    650                SQR(pa1, pa2);
    651                SWAPPA;
    652            } else {
    653                abort();
    654            }
    655        } else {
    656            abort();
    657        }
    658    }
    659 
    660    res = s_mp_redc(pa1, mmm);
    661    mp_exch(pa1, result);
    662 
    663 CLEANUP:
    664    mp_clear(&accum1);
    665    mp_clear(&accum2);
    666    mp_clear(&power2);
    667    for (i = 0; i < odd_ints; ++i) {
    668        mp_clear(oddPowers + i);
    669    }
    670    return res;
    671 }
    672 #undef SQR
    673 #undef MUL
    674 
    675 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
    676 unsigned int mp_using_cache_safe_exp = 1;
    677 #endif
    678 
    679 mp_err
    680 mp_set_safe_modexp(int value)
    681 {
    682 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
    683    mp_using_cache_safe_exp = value;
    684    return MP_OKAY;
    685 #else
    686    if (value == 0) {
    687        return MP_OKAY;
    688    }
    689    return MP_BADARG;
    690 #endif
    691 }
    692 
    693 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
    694 #define WEAVE_WORD_SIZE 4
    695 
    696 /*
    697 * mpi_to_weave takes an array of bignums, a matrix in which each bignum
    698 * occupies all the columns of a row, and transposes it into a matrix in
    699 * which each bignum occupies a column of every row.  The first row of the
    700 * input matrix becomes the first column of the output matrix.  The n'th
    701 * row of input becomes the n'th column of output.  The input data is said
    702 * to be "interleaved" or "woven" into the output matrix.
    703 *
    704 * The array of bignums is left in this woven form.  Each time a single
    705 * bignum value is needed, it is recreated by fetching the n'th column,
    706 * forming a single row which is the new bignum.
    707 *
    708 * The purpose of this interleaving is make it impossible to determine which
    709 * of the bignums is being used in any one operation by examining the pattern
    710 * of cache misses.
    711 *
    712 * The weaving function does not transpose the entire input matrix in one call.
    713 * It transposes 4 rows of mp_ints into their respective columns of output.
    714 *
    715 * This implementation treats each mp_int bignum as an array of mp_digits,
    716 * It stores those bytes as a column of mp_digits in the output matrix.  It
    717 * doesn't care if the machine uses big-endian or little-endian byte ordering
    718 * within mp_digits.
    719 *
    720 * "bignums" is an array of mp_ints.
    721 * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
    722 *
    723 * "weaved" is the weaved output matrix.
    724 * The first byte of bignums[0] is stored in weaved[0].
    725 *
    726 * "nBignums" is the total number of bignums in the array of which "bignums"
    727 * is a part.
    728 *
    729 * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.
    730 * mp_ints that use less than nDigits digits are logically padded with zeros
    731 * while being stored in the weaved array.
    732 */
    733 mp_err
    734 mpi_to_weave(const mp_int *bignums,
    735             mp_digit *weaved,
    736             mp_size nDigits,  /* in each mp_int of input */
    737             mp_size nBignums) /* in the entire source array */
    738 {
    739    mp_size i;
    740    mp_digit *endDest = weaved + (nDigits * nBignums);
    741 
    742    for (i = 0; i < WEAVE_WORD_SIZE; i++) {
    743        mp_size used = MP_USED(&bignums[i]);
    744        mp_digit *pSrc = MP_DIGITS(&bignums[i]);
    745        mp_digit *endSrc = pSrc + used;
    746        mp_digit *pDest = weaved + i;
    747 
    748        ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
    749        ARGCHK(used <= nDigits, MP_BADARG);
    750 
    751        for (; pSrc < endSrc; pSrc++) {
    752            *pDest = *pSrc;
    753            pDest += nBignums;
    754        }
    755        while (pDest < endDest) {
    756            *pDest = 0;
    757            pDest += nBignums;
    758        }
    759    }
    760 
    761    return MP_OKAY;
    762 }
    763 
    764 /*
    765 * These functions return 0xffffffff if the output is true, and 0 otherwise.
    766 */
    767 #define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1)))
    768 #define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1))
    769 #define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b))
    770 
    771 /* Reverse the operation above for one mp_int.
    772 * Reconstruct one mp_int from its column in the weaved array.
    773 * Every read accesses every element of the weaved array, in order to
    774 * avoid timing attacks based on patterns of memory accesses.
    775 */
    776 mp_err
    777 weave_to_mpi(mp_int *a,              /* out, result */
    778             const mp_digit *weaved, /* in, byte matrix */
    779             mp_size index,          /* which column to read */
    780             mp_size nDigits,        /* number of mp_digits in each bignum */
    781             mp_size nBignums)       /* width of the matrix */
    782 {
    783    /* these are indices, but need to be the same size as mp_digit
    784     * because of the CONST_TIME operations */
    785    mp_digit i, j;
    786    mp_digit d;
    787    mp_digit *pDest = MP_DIGITS(a);
    788 
    789    MP_SIGN(a) = MP_ZPOS;
    790    MP_USED(a) = nDigits;
    791 
    792    assert(weaved != NULL);
    793 
    794    /* Fetch the proper column in constant time, indexing over the whole array */
    795    for (i = 0; i < nDigits; ++i) {
    796        d = 0;
    797        for (j = 0; j < nBignums; ++j) {
    798            d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index);
    799        }
    800        pDest[i] = d;
    801    }
    802 
    803    s_mp_clamp(a);
    804    return MP_OKAY;
    805 }
    806 
    807 #define SQR(a, b)             \
    808    MP_CHECKOK(mp_sqr(a, b)); \
    809    MP_CHECKOK(s_mp_redc(b, mmm))
    810 
    811 #if defined(MP_MONT_USE_MP_MUL)
    812 #define MUL_NOWEAVE(x, a, b)     \
    813    MP_CHECKOK(mp_mul(a, x, b)); \
    814    MP_CHECKOK(s_mp_redc(b, mmm))
    815 #else
    816 #define MUL_NOWEAVE(x, a, b) \
    817    MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm))
    818 #endif
    819 
    820 #define MUL(x, a, b)                                               \
    821    MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \
    822    MUL_NOWEAVE(&tmp, a, b)
    823 
    824 #define SWAPPA  \
    825    ptmp = pa1; \
    826    pa1 = pa2;  \
    827    pa2 = ptmp
    828 #define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y)))
    829 
    830 /* Do modular exponentiation using integer multiply code. */
    831 mp_err
    832 mp_exptmod_safe_i(const mp_int *montBase,
    833                  const mp_int *exponent,
    834                  const mp_int *modulus,
    835                  mp_int *result,
    836                  mp_mont_modulus *mmm,
    837                  int nLen,
    838                  mp_size bits_in_exponent,
    839                  mp_size window_bits,
    840                  mp_size num_powers)
    841 {
    842    mp_int *pa1, *pa2, *ptmp;
    843    mp_size i;
    844    mp_size first_window;
    845    mp_err res;
    846    int expOff;
    847    mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
    848    mp_int tmp;
    849    mp_digit *powersArray = NULL;
    850    mp_digit *powers = NULL;
    851 
    852    MP_DIGITS(&accum1) = 0;
    853    MP_DIGITS(&accum2) = 0;
    854    MP_DIGITS(&accum[0]) = 0;
    855    MP_DIGITS(&accum[1]) = 0;
    856    MP_DIGITS(&accum[2]) = 0;
    857    MP_DIGITS(&accum[3]) = 0;
    858    MP_DIGITS(&tmp) = 0;
    859 
    860    /* grab the first window value. This allows us to preload accumulator1
    861     * and save a conversion, some squares and a multiple*/
    862    MP_CHECKOK(mpl_get_bits(exponent,
    863                            bits_in_exponent - window_bits, window_bits));
    864    first_window = (mp_size)res;
    865 
    866    MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
    867    MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
    868 
    869    /* build the first WEAVE_WORD powers inline */
    870    /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
    871    if (num_powers > 2) {
    872        MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
    873        MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
    874        MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
    875        MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
    876        mp_set(&accum[0], 1);
    877        MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
    878        MP_CHECKOK(mp_copy(montBase, &accum[1]));
    879        SQR(montBase, &accum[2]);
    880        MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
    881        powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
    882        if (!powersArray) {
    883            res = MP_MEM;
    884            goto CLEANUP;
    885        }
    886        /* powers[i] = base ** (i); */
    887        powers = (mp_digit *)MP_ALIGN(powersArray, num_powers);
    888        MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
    889        if (first_window < 4) {
    890            MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
    891            first_window = num_powers;
    892        }
    893    } else {
    894        if (first_window == 0) {
    895            mp_set(&accum1, 1);
    896            MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
    897        } else {
    898            /* assert first_window == 1? */
    899            MP_CHECKOK(mp_copy(montBase, &accum1));
    900        }
    901    }
    902 
    903    /*
    904     * calculate all the powers in the powers array.
    905     * this adds 2**(k-1)-2 square operations over just calculating the
    906     * odd powers where k is the window size in the two other mp_modexpt
    907     * implementations in this file. We will get some of that
    908     * back by not needing the first 'k' squares and one multiply for the
    909     * first window.
    910     * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if
    911     * num_powers > 2, in which case powers will have been allocated.
    912     */
    913    for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
    914        int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */
    915        if (i & 1) {
    916            MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]);
    917            /* we've filled the array do our 'per array' processing */
    918            if (acc_index == (WEAVE_WORD_SIZE - 1)) {
    919                MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1),
    920                                        nLen, num_powers));
    921 
    922                if (first_window <= i) {
    923                    MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)],
    924                                       &accum1));
    925                    first_window = num_powers;
    926                }
    927            }
    928        } else {
    929            /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
    930             * and target are the same so we need to copy.. After that, the
    931             * value is overwritten, so we need to fetch it from the stored
    932             * weave array */
    933            if (i > 2 * WEAVE_WORD_SIZE) {
    934                MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers));
    935                SQR(&accum2, &accum[acc_index]);
    936            } else {
    937                int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1);
    938                if (half_power_index == acc_index) {
    939                    /* copy is cheaper than weave_to_mpi */
    940                    MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
    941                    SQR(&accum2, &accum[acc_index]);
    942                } else {
    943                    SQR(&accum[half_power_index], &accum[acc_index]);
    944                }
    945            }
    946        }
    947    }
    948 /* if the accum1 isn't set, Then there is something wrong with our logic
    949 * above and is an internal programming error.
    950 */
    951 #if MP_ARGCHK == 2
    952    assert(MP_USED(&accum1) != 0);
    953 #endif
    954 
    955    /* set accumulator to montgomery residue of 1 */
    956    pa1 = &accum1;
    957    pa2 = &accum2;
    958 
    959    /* tmp is not used if window_bits == 1. */
    960    if (window_bits != 1) {
    961        MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2));
    962    }
    963 
    964    for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) {
    965        mp_size smallExp;
    966        MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
    967        smallExp = (mp_size)res;
    968 
    969        /* handle unroll the loops */
    970        switch (window_bits) {
    971            case 1:
    972                if (!smallExp) {
    973                    SQR(pa1, pa2);
    974                    SWAPPA;
    975                } else if (smallExp & 1) {
    976                    SQR(pa1, pa2);
    977                    MUL_NOWEAVE(montBase, pa2, pa1);
    978                } else {
    979                    abort();
    980                }
    981                break;
    982            case 6:
    983                SQR(pa1, pa2);
    984                SQR(pa2, pa1);
    985            /* fall through */
    986            case 4:
    987                SQR(pa1, pa2);
    988                SQR(pa2, pa1);
    989                SQR(pa1, pa2);
    990                SQR(pa2, pa1);
    991                MUL(smallExp, pa1, pa2);
    992                SWAPPA;
    993                break;
    994            case 5:
    995                SQR(pa1, pa2);
    996                SQR(pa2, pa1);
    997                SQR(pa1, pa2);
    998                SQR(pa2, pa1);
    999                SQR(pa1, pa2);
   1000                MUL(smallExp, pa2, pa1);
   1001                break;
   1002            default:
   1003                abort(); /* could do a loop? */
   1004        }
   1005    }
   1006 
   1007    res = s_mp_redc(pa1, mmm);
   1008    mp_exch(pa1, result);
   1009 
   1010 CLEANUP:
   1011    mp_clear(&accum1);
   1012    mp_clear(&accum2);
   1013    mp_clear(&accum[0]);
   1014    mp_clear(&accum[1]);
   1015    mp_clear(&accum[2]);
   1016    mp_clear(&accum[3]);
   1017    mp_clear(&tmp);
   1018    /* zero required by FIPS here, can't use PORT_ZFree
   1019     * because mpi doesn't link with util */
   1020    if (powers) {
   1021        PORT_Memset(powers, 0, num_powers * sizeof(mp_digit));
   1022    }
   1023    free(powersArray);
   1024    return res;
   1025 }
   1026 #undef SQR
   1027 #undef MUL
   1028 #endif
   1029 
   1030 mp_err
   1031 mp_exptmod(const mp_int *inBase, const mp_int *exponent,
   1032           const mp_int *modulus, mp_int *result)
   1033 {
   1034    const mp_int *base;
   1035    mp_size bits_in_exponent, i, window_bits, odd_ints;
   1036    mp_err res;
   1037    int nLen;
   1038    mp_int montBase, goodBase;
   1039    mp_mont_modulus mmm;
   1040 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
   1041    static unsigned int max_window_bits;
   1042 #endif
   1043 
   1044    /* function for computing n0prime only works if n0 is odd */
   1045    if (!mp_isodd(modulus))
   1046        return s_mp_exptmod(inBase, exponent, modulus, result);
   1047 
   1048    if (mp_cmp_z(inBase) == MP_LT)
   1049        return MP_RANGE;
   1050    MP_DIGITS(&montBase) = 0;
   1051    MP_DIGITS(&goodBase) = 0;
   1052 
   1053    if (mp_cmp(inBase, modulus) < 0) {
   1054        base = inBase;
   1055    } else {
   1056        MP_CHECKOK(mp_init(&goodBase));
   1057        base = &goodBase;
   1058        MP_CHECKOK(mp_mod(inBase, modulus, &goodBase));
   1059    }
   1060 
   1061    nLen = MP_USED(modulus);
   1062    MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
   1063 
   1064    mmm.N = *modulus; /* a copy of the mp_int struct */
   1065 
   1066    /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
   1067    **        where n0 = least significant mp_digit of N, the modulus.
   1068    */
   1069    mmm.n0prime = mp_calculate_mont_n0i(modulus);
   1070 
   1071    MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
   1072 
   1073    bits_in_exponent = mpl_significant_bits(exponent);
   1074 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
   1075    if (mp_using_cache_safe_exp) {
   1076        if (bits_in_exponent > 780)
   1077            window_bits = 6;
   1078        else if (bits_in_exponent > 256)
   1079            window_bits = 5;
   1080        else if (bits_in_exponent > 20)
   1081            window_bits = 4;
   1082        /* RSA public key exponents are typically under 20 bits (common values
   1083         * are: 3, 17, 65537) and a 4-bit window is inefficient
   1084         */
   1085        else
   1086            window_bits = 1;
   1087    } else
   1088 #endif
   1089        if (bits_in_exponent > 480)
   1090        window_bits = 6;
   1091    else if (bits_in_exponent > 160)
   1092        window_bits = 5;
   1093    else if (bits_in_exponent > 20)
   1094        window_bits = 4;
   1095    /* RSA public key exponents are typically under 20 bits (common values
   1096     * are: 3, 17, 65537) and a 4-bit window is inefficient
   1097     */
   1098    else
   1099        window_bits = 1;
   1100 
   1101 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
   1102    /*
   1103     * clamp the window size based on
   1104     * the cache line size.
   1105     */
   1106    if (!max_window_bits) {
   1107        unsigned long cache_size = s_mpi_getProcessorLineSize();
   1108        /* processor has no cache, use 'fast' code always */
   1109        if (cache_size == 0) {
   1110            mp_using_cache_safe_exp = 0;
   1111        }
   1112        if ((cache_size == 0) || (cache_size >= 64)) {
   1113            max_window_bits = 6;
   1114        } else if (cache_size >= 32) {
   1115            max_window_bits = 5;
   1116        } else if (cache_size >= 16) {
   1117            max_window_bits = 4;
   1118        } else
   1119            max_window_bits = 1; /* should this be an assert? */
   1120    }
   1121 
   1122    /* clamp the window size down before we caclulate bits_in_exponent */
   1123    if (mp_using_cache_safe_exp) {
   1124        if (window_bits > max_window_bits) {
   1125            window_bits = max_window_bits;
   1126        }
   1127    }
   1128 #endif
   1129 
   1130    odd_ints = 1 << (window_bits - 1);
   1131    i = bits_in_exponent % window_bits;
   1132    if (i != 0) {
   1133        bits_in_exponent += window_bits - i;
   1134    }
   1135 
   1136 #ifdef MP_USING_MONT_MULF
   1137    if (mp_using_mont_mulf) {
   1138        MP_CHECKOK(s_mp_pad(&montBase, nLen));
   1139        res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,
   1140                           bits_in_exponent, window_bits, odd_ints);
   1141    } else
   1142 #endif
   1143 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
   1144        if (mp_using_cache_safe_exp) {
   1145        res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,
   1146                                bits_in_exponent, window_bits, 1 << window_bits);
   1147    } else
   1148 #endif
   1149        res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,
   1150                           bits_in_exponent, window_bits, odd_ints);
   1151 
   1152 CLEANUP:
   1153    mp_clear(&montBase);
   1154    mp_clear(&goodBase);
   1155    /* Don't mp_clear mmm.N because it is merely a copy of modulus.
   1156    ** Just zap it.
   1157    */
   1158    memset(&mmm, 0, sizeof mmm);
   1159    return res;
   1160 }