tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mpi.c (121599B)


      1 /*
      2 *  mpi.c
      3 *
      4 *  Arbitrary precision integer arithmetic library
      5 *
      6 * This Source Code Form is subject to the terms of the Mozilla Public
      7 * License, v. 2.0. If a copy of the MPL was not distributed with this
      8 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      9 
     10 #include "mpi-priv.h"
     11 #include "mplogic.h"
     12 
     13 #include <assert.h>
     14 
     15 #if defined(__arm__) && \
     16    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
     17 /* 16-bit thumb or ARM v3 doesn't work inlined assember version */
     18 #undef MP_ASSEMBLY_MULTIPLY
     19 #undef MP_ASSEMBLY_SQUARE
     20 #endif
     21 
     22 #if MP_LOGTAB
     23 /*
     24  A table of the logs of 2 for various bases (the 0 and 1 entries of
     25  this table are meaningless and should not be referenced).
     26 
     27  This table is used to compute output lengths for the mp_toradix()
     28  function.  Since a number n in radix r takes up about log_r(n)
     29  digits, we estimate the output size by taking the least integer
     30  greater than log_r(n), where:
     31 
     32  log_r(n) = log_2(n) * log_r(2)
     33 
     34  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
     35  which are the output bases supported.
     36 */
     37 #include "logtab.h"
     38 #endif
     39 
     40 #ifdef CT_VERIF
     41 #include <valgrind/memcheck.h>
     42 #endif
     43 
     44 /* {{{ Constant strings */
     45 
     46 /* Constant strings returned by mp_strerror() */
     47 static const char *mp_err_string[] = {
     48    "unknown result code",     /* say what?            */
     49    "boolean true",            /* MP_OKAY, MP_YES      */
     50    "boolean false",           /* MP_NO                */
     51    "out of memory",           /* MP_MEM               */
     52    "argument out of range",   /* MP_RANGE             */
     53    "invalid input parameter", /* MP_BADARG            */
     54    "result is undefined"      /* MP_UNDEF             */
     55 };
     56 
     57 /* Value to digit maps for radix conversion   */
     58 
     59 /* s_dmap_1 - standard digits and letters */
     60 static const char *s_dmap_1 =
     61    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
     62 
     63 /* }}} */
     64 
     65 /* {{{ Default precision manipulation */
     66 
     67 /* Default precision for newly created mp_int's      */
     68 static mp_size s_mp_defprec = MP_DEFPREC;
     69 
     70 mp_size
     71 mp_get_prec(void)
     72 {
     73    return s_mp_defprec;
     74 
     75 } /* end mp_get_prec() */
     76 
     77 void
     78 mp_set_prec(mp_size prec)
     79 {
     80    if (prec == 0)
     81        s_mp_defprec = MP_DEFPREC;
     82    else
     83        s_mp_defprec = prec;
     84 
     85 } /* end mp_set_prec() */
     86 
     87 /* }}} */
     88 
     89 #ifdef CT_VERIF
     90 void
     91 mp_taint(mp_int *mp)
     92 {
     93    size_t i;
     94    for (i = 0; i < mp->used; ++i) {
     95        VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
     96    }
     97 }
     98 
     99 void
    100 mp_untaint(mp_int *mp)
    101 {
    102    size_t i;
    103    for (i = 0; i < mp->used; ++i) {
    104        VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
    105    }
    106 }
    107 #endif
    108 
    109 /*------------------------------------------------------------------------*/
    110 /* {{{ mp_init(mp) */
    111 
    112 /*
    113  mp_init(mp)
    114 
    115  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
    116  MP_MEM if memory could not be allocated for the structure.
    117 */
    118 
    119 mp_err
    120 mp_init(mp_int *mp)
    121 {
    122    return mp_init_size(mp, s_mp_defprec);
    123 
    124 } /* end mp_init() */
    125 
    126 /* }}} */
    127 
    128 /* {{{ mp_init_size(mp, prec) */
    129 
    130 /*
    131  mp_init_size(mp, prec)
    132 
    133  Initialize a new zero-valued mp_int with at least the given
    134  precision; returns MP_OKAY if successful, or MP_MEM if memory could
    135  not be allocated for the structure.
    136 */
    137 
    138 mp_err
    139 mp_init_size(mp_int *mp, mp_size prec)
    140 {
    141    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
    142 
    143    prec = MP_ROUNDUP(prec, s_mp_defprec);
    144    if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
    145        return MP_MEM;
    146 
    147    SIGN(mp) = ZPOS;
    148    USED(mp) = 1;
    149    ALLOC(mp) = prec;
    150 
    151    return MP_OKAY;
    152 
    153 } /* end mp_init_size() */
    154 
    155 /* }}} */
    156 
    157 /* {{{ mp_init_copy(mp, from) */
    158 
    159 /*
    160  mp_init_copy(mp, from)
    161 
    162  Initialize mp as an exact copy of from.  Returns MP_OKAY if
    163  successful, MP_MEM if memory could not be allocated for the new
    164  structure.
    165 */
    166 
    167 mp_err
    168 mp_init_copy(mp_int *mp, const mp_int *from)
    169 {
    170    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
    171 
    172    if (mp == from)
    173        return MP_OKAY;
    174 
    175    if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
    176        return MP_MEM;
    177 
    178    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
    179    USED(mp) = USED(from);
    180    ALLOC(mp) = ALLOC(from);
    181    SIGN(mp) = SIGN(from);
    182 
    183    return MP_OKAY;
    184 
    185 } /* end mp_init_copy() */
    186 
    187 /* }}} */
    188 
    189 /* {{{ mp_copy(from, to) */
    190 
    191 /*
    192  mp_copy(from, to)
    193 
    194  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
    195  'to' has already been initialized (if not, use mp_init_copy()
    196  instead). If 'from' and 'to' are identical, nothing happens.
    197 */
    198 
    199 mp_err
    200 mp_copy(const mp_int *from, mp_int *to)
    201 {
    202    ARGCHK(from != NULL && to != NULL, MP_BADARG);
    203 
    204    if (from == to)
    205        return MP_OKAY;
    206 
    207    { /* copy */
    208        mp_digit *tmp;
    209 
    210        /*
    211          If the allocated buffer in 'to' already has enough space to hold
    212          all the used digits of 'from', we'll re-use it to avoid hitting
    213          the memory allocater more than necessary; otherwise, we'd have
    214          to grow anyway, so we just allocate a hunk and make the copy as
    215          usual
    216         */
    217        if (ALLOC(to) >= USED(from)) {
    218            s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
    219            s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
    220 
    221        } else {
    222            if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
    223                return MP_MEM;
    224 
    225            s_mp_copy(DIGITS(from), tmp, USED(from));
    226 
    227            if (DIGITS(to) != NULL) {
    228                s_mp_setz(DIGITS(to), ALLOC(to));
    229                s_mp_free(DIGITS(to));
    230            }
    231 
    232            DIGITS(to) = tmp;
    233            ALLOC(to) = ALLOC(from);
    234        }
    235 
    236        /* Copy the precision and sign from the original */
    237        USED(to) = USED(from);
    238        SIGN(to) = SIGN(from);
    239    } /* end copy */
    240 
    241    return MP_OKAY;
    242 
    243 } /* end mp_copy() */
    244 
    245 /* }}} */
    246 
    247 /* {{{ mp_exch(mp1, mp2) */
    248 
    249 /*
    250  mp_exch(mp1, mp2)
    251 
    252  Exchange mp1 and mp2 without allocating any intermediate memory
    253  (well, unless you count the stack space needed for this call and the
    254  locals it creates...).  This cannot fail.
    255 */
    256 
    257 void
    258 mp_exch(mp_int *mp1, mp_int *mp2)
    259 {
    260 #if MP_ARGCHK == 2
    261    assert(mp1 != NULL && mp2 != NULL);
    262 #else
    263    if (mp1 == NULL || mp2 == NULL)
    264        return;
    265 #endif
    266 
    267    s_mp_exch(mp1, mp2);
    268 
    269 } /* end mp_exch() */
    270 
    271 /* }}} */
    272 
    273 /* {{{ mp_clear(mp) */
    274 
    275 /*
    276  mp_clear(mp)
    277 
    278  Release the storage used by an mp_int, and void its fields so that
    279  if someone calls mp_clear() again for the same int later, we won't
    280  get tollchocked.
    281 */
    282 
    283 void
    284 mp_clear(mp_int *mp)
    285 {
    286    if (mp == NULL)
    287        return;
    288 
    289    if (DIGITS(mp) != NULL) {
    290        s_mp_setz(DIGITS(mp), ALLOC(mp));
    291        s_mp_free(DIGITS(mp));
    292        DIGITS(mp) = NULL;
    293    }
    294 
    295    USED(mp) = 0;
    296    ALLOC(mp) = 0;
    297 
    298 } /* end mp_clear() */
    299 
    300 /* }}} */
    301 
    302 /* {{{ mp_zero(mp) */
    303 
    304 /*
    305  mp_zero(mp)
    306 
    307  Set mp to zero.  Does not change the allocated size of the structure,
    308  and therefore cannot fail (except on a bad argument, which we ignore)
    309 */
    310 void
    311 mp_zero(mp_int *mp)
    312 {
    313    if (mp == NULL)
    314        return;
    315 
    316    s_mp_setz(DIGITS(mp), ALLOC(mp));
    317    USED(mp) = 1;
    318    SIGN(mp) = ZPOS;
    319 
    320 } /* end mp_zero() */
    321 
    322 /* }}} */
    323 
    324 /* {{{ mp_set(mp, d) */
    325 
    326 void
    327 mp_set(mp_int *mp, mp_digit d)
    328 {
    329    if (mp == NULL)
    330        return;
    331 
    332    mp_zero(mp);
    333    DIGIT(mp, 0) = d;
    334 
    335 } /* end mp_set() */
    336 
    337 /* }}} */
    338 
    339 /* {{{ mp_set_int(mp, z) */
    340 
    341 mp_err
    342 mp_set_int(mp_int *mp, long z)
    343 {
    344    unsigned long v = labs(z);
    345    mp_err res;
    346 
    347    ARGCHK(mp != NULL, MP_BADARG);
    348 
    349    /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
    350    if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
    351        return res;
    352    }
    353 
    354    if (z < 0) {
    355        SIGN(mp) = NEG;
    356    }
    357 
    358    return MP_OKAY;
    359 } /* end mp_set_int() */
    360 
    361 /* }}} */
    362 
    363 /* {{{ mp_set_ulong(mp, z) */
    364 
    365 mp_err
    366 mp_set_ulong(mp_int *mp, unsigned long z)
    367 {
    368    int ix;
    369    mp_err res;
    370 
    371    ARGCHK(mp != NULL, MP_BADARG);
    372 
    373    mp_zero(mp);
    374    if (z == 0)
    375        return MP_OKAY; /* shortcut for zero */
    376 
    377    if (sizeof z <= sizeof(mp_digit)) {
    378        DIGIT(mp, 0) = z;
    379    } else {
    380        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
    381            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
    382                return res;
    383 
    384            res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
    385            if (res != MP_OKAY)
    386                return res;
    387        }
    388    }
    389    return MP_OKAY;
    390 } /* end mp_set_ulong() */
    391 
    392 /* }}} */
    393 
    394 /*------------------------------------------------------------------------*/
    395 /* {{{ Digit arithmetic */
    396 
    397 /* {{{ mp_add_d(a, d, b) */
    398 
    399 /*
    400  mp_add_d(a, d, b)
    401 
    402  Compute the sum b = a + d, for a single digit d.  Respects the sign of
    403  its primary addend (single digits are unsigned anyway).
    404 */
    405 
    406 mp_err
    407 mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
    408 {
    409    mp_int tmp;
    410    mp_err res;
    411 
    412    ARGCHK(a != NULL && b != NULL, MP_BADARG);
    413 
    414    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    415        return res;
    416 
    417    if (SIGN(&tmp) == ZPOS) {
    418        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
    419            goto CLEANUP;
    420    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
    421        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
    422            goto CLEANUP;
    423    } else {
    424        mp_neg(&tmp, &tmp);
    425 
    426        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
    427    }
    428 
    429    if (s_mp_cmp_d(&tmp, 0) == 0)
    430        SIGN(&tmp) = ZPOS;
    431 
    432    s_mp_exch(&tmp, b);
    433 
    434 CLEANUP:
    435    mp_clear(&tmp);
    436    return res;
    437 
    438 } /* end mp_add_d() */
    439 
    440 /* }}} */
    441 
    442 /* {{{ mp_sub_d(a, d, b) */
    443 
    444 /*
    445  mp_sub_d(a, d, b)
    446 
    447  Compute the difference b = a - d, for a single digit d.  Respects the
    448  sign of its subtrahend (single digits are unsigned anyway).
    449 */
    450 
    451 mp_err
    452 mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
    453 {
    454    mp_int tmp;
    455    mp_err res;
    456 
    457    ARGCHK(a != NULL && b != NULL, MP_BADARG);
    458 
    459    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    460        return res;
    461 
    462    if (SIGN(&tmp) == NEG) {
    463        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
    464            goto CLEANUP;
    465    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
    466        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
    467            goto CLEANUP;
    468    } else {
    469        mp_neg(&tmp, &tmp);
    470 
    471        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
    472        SIGN(&tmp) = NEG;
    473    }
    474 
    475    if (s_mp_cmp_d(&tmp, 0) == 0)
    476        SIGN(&tmp) = ZPOS;
    477 
    478    s_mp_exch(&tmp, b);
    479 
    480 CLEANUP:
    481    mp_clear(&tmp);
    482    return res;
    483 
    484 } /* end mp_sub_d() */
    485 
    486 /* }}} */
    487 
    488 /* {{{ mp_mul_d(a, d, b) */
    489 
    490 /*
    491  mp_mul_d(a, d, b)
    492 
    493  Compute the product b = a * d, for a single digit d.  Respects the sign
    494  of its multiplicand (single digits are unsigned anyway)
    495 */
    496 
    497 mp_err
    498 mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
    499 {
    500    mp_err res;
    501 
    502    ARGCHK(a != NULL && b != NULL, MP_BADARG);
    503 
    504    if (d == 0) {
    505        mp_zero(b);
    506        return MP_OKAY;
    507    }
    508 
    509    if ((res = mp_copy(a, b)) != MP_OKAY)
    510        return res;
    511 
    512    res = s_mp_mul_d(b, d);
    513 
    514    return res;
    515 
    516 } /* end mp_mul_d() */
    517 
    518 /* }}} */
    519 
    520 /* {{{ mp_mul_2(a, c) */
    521 
    522 mp_err
    523 mp_mul_2(const mp_int *a, mp_int *c)
    524 {
    525    mp_err res;
    526 
    527    ARGCHK(a != NULL && c != NULL, MP_BADARG);
    528 
    529    if ((res = mp_copy(a, c)) != MP_OKAY)
    530        return res;
    531 
    532    return s_mp_mul_2(c);
    533 
    534 } /* end mp_mul_2() */
    535 
    536 /* }}} */
    537 
    538 /* {{{ mp_div_d(a, d, q, r) */
    539 
    540 /*
    541  mp_div_d(a, d, q, r)
    542 
    543  Compute the quotient q = a / d and remainder r = a mod d, for a
    544  single digit d.  Respects the sign of its divisor (single digits are
    545  unsigned anyway).
    546 */
    547 
    548 mp_err
    549 mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
    550 {
    551    mp_err res;
    552    mp_int qp;
    553    mp_digit rem = 0;
    554    int pow;
    555 
    556    ARGCHK(a != NULL, MP_BADARG);
    557 
    558    if (d == 0)
    559        return MP_RANGE;
    560 
    561    /* Shortcut for powers of two ... */
    562    if ((pow = s_mp_ispow2d(d)) >= 0) {
    563        mp_digit mask;
    564 
    565        mask = ((mp_digit)1 << pow) - 1;
    566        rem = DIGIT(a, 0) & mask;
    567 
    568        if (q) {
    569            if ((res = mp_copy(a, q)) != MP_OKAY) {
    570                return res;
    571            }
    572            s_mp_div_2d(q, pow);
    573        }
    574 
    575        if (r)
    576            *r = rem;
    577 
    578        return MP_OKAY;
    579    }
    580 
    581    if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
    582        return res;
    583 
    584    res = s_mp_div_d(&qp, d, &rem);
    585 
    586    if (s_mp_cmp_d(&qp, 0) == 0)
    587        SIGN(q) = ZPOS;
    588 
    589    if (r) {
    590        *r = rem;
    591    }
    592 
    593    if (q)
    594        s_mp_exch(&qp, q);
    595 
    596    mp_clear(&qp);
    597    return res;
    598 
    599 } /* end mp_div_d() */
    600 
    601 /* }}} */
    602 
    603 /* {{{ mp_div_2(a, c) */
    604 
    605 /*
    606  mp_div_2(a, c)
    607 
    608  Compute c = a / 2, disregarding the remainder.
    609 */
    610 
    611 mp_err
    612 mp_div_2(const mp_int *a, mp_int *c)
    613 {
    614    mp_err res;
    615 
    616    ARGCHK(a != NULL && c != NULL, MP_BADARG);
    617 
    618    if ((res = mp_copy(a, c)) != MP_OKAY)
    619        return res;
    620 
    621    s_mp_div_2(c);
    622 
    623    return MP_OKAY;
    624 
    625 } /* end mp_div_2() */
    626 
    627 /* }}} */
    628 
    629 /* {{{ mp_expt_d(a, d, b) */
    630 
    631 mp_err
    632 mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
    633 {
    634    mp_int s, x;
    635    mp_err res;
    636 
    637    ARGCHK(a != NULL && c != NULL, MP_BADARG);
    638 
    639    if ((res = mp_init(&s)) != MP_OKAY)
    640        return res;
    641    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
    642        goto X;
    643 
    644    DIGIT(&s, 0) = 1;
    645 
    646    while (d != 0) {
    647        if (d & 1) {
    648            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
    649                goto CLEANUP;
    650        }
    651 
    652        d /= 2;
    653 
    654        if ((res = s_mp_sqr(&x)) != MP_OKAY)
    655            goto CLEANUP;
    656    }
    657 
    658    s_mp_exch(&s, c);
    659 
    660 CLEANUP:
    661    mp_clear(&x);
    662 X:
    663    mp_clear(&s);
    664 
    665    return res;
    666 
    667 } /* end mp_expt_d() */
    668 
    669 /* }}} */
    670 
    671 /* }}} */
    672 
    673 /*------------------------------------------------------------------------*/
    674 /* {{{ Full arithmetic */
    675 
    676 /* {{{ mp_abs(a, b) */
    677 
    678 /*
    679  mp_abs(a, b)
    680 
    681  Compute b = |a|.  'a' and 'b' may be identical.
    682 */
    683 
    684 mp_err
    685 mp_abs(const mp_int *a, mp_int *b)
    686 {
    687    mp_err res;
    688 
    689    ARGCHK(a != NULL && b != NULL, MP_BADARG);
    690 
    691    if ((res = mp_copy(a, b)) != MP_OKAY)
    692        return res;
    693 
    694    SIGN(b) = ZPOS;
    695 
    696    return MP_OKAY;
    697 
    698 } /* end mp_abs() */
    699 
    700 /* }}} */
    701 
    702 /* {{{ mp_neg(a, b) */
    703 
    704 /*
    705  mp_neg(a, b)
    706 
    707  Compute b = -a.  'a' and 'b' may be identical.
    708 */
    709 
    710 mp_err
    711 mp_neg(const mp_int *a, mp_int *b)
    712 {
    713    mp_err res;
    714 
    715    ARGCHK(a != NULL && b != NULL, MP_BADARG);
    716 
    717    if ((res = mp_copy(a, b)) != MP_OKAY)
    718        return res;
    719 
    720    if (s_mp_cmp_d(b, 0) == MP_EQ)
    721        SIGN(b) = ZPOS;
    722    else
    723        SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
    724 
    725    return MP_OKAY;
    726 
    727 } /* end mp_neg() */
    728 
    729 /* }}} */
    730 
    731 /* {{{ mp_add(a, b, c) */
    732 
    733 /*
    734  mp_add(a, b, c)
    735 
    736  Compute c = a + b.  All parameters may be identical.
    737 */
    738 
    739 mp_err
    740 mp_add(const mp_int *a, const mp_int *b, mp_int *c)
    741 {
    742    mp_err res;
    743 
    744    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
    745 
    746    if (SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
    747        MP_CHECKOK(s_mp_add_3arg(a, b, c));
    748    } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b|   */
    749        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
    750    } else { /* different sign: |a|  < |b|   */
    751        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
    752    }
    753 
    754    if (s_mp_cmp_d(c, 0) == MP_EQ)
    755        SIGN(c) = ZPOS;
    756 
    757 CLEANUP:
    758    return res;
    759 
    760 } /* end mp_add() */
    761 
    762 /* }}} */
    763 
    764 /* {{{ mp_sub(a, b, c) */
    765 
    766 /*
    767  mp_sub(a, b, c)
    768 
    769  Compute c = a - b.  All parameters may be identical.
    770 */
    771 
    772 mp_err
    773 mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
    774 {
    775    mp_err res;
    776    int magDiff;
    777 
    778    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
    779 
    780    if (a == b) {
    781        mp_zero(c);
    782        return MP_OKAY;
    783    }
    784 
    785    if (MP_SIGN(a) != MP_SIGN(b)) {
    786        MP_CHECKOK(s_mp_add_3arg(a, b, c));
    787    } else if (!(magDiff = s_mp_cmp(a, b))) {
    788        mp_zero(c);
    789        res = MP_OKAY;
    790    } else if (magDiff > 0) {
    791        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
    792    } else {
    793        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
    794        MP_SIGN(c) = !MP_SIGN(a);
    795    }
    796 
    797    if (s_mp_cmp_d(c, 0) == MP_EQ)
    798        MP_SIGN(c) = MP_ZPOS;
    799 
    800 CLEANUP:
    801    return res;
    802 
    803 } /* end mp_sub() */
    804 
    805 /* }}} */
    806 
    807 /* {{{ s_mp_mulg(a, b, c) */
    808 
    809 /*
    810  s_mp_mulg(a, b, c)
    811 
    812  Compute c = a * b.  All parameters may be identical. if constantTime is set,
    813  then the operations are done in constant time. The original is mostly
    814  constant time as long as s_mpv_mul_d_add() is constant time. This is true
    815  of the x86 assembler, as well as the current c code.
    816 */
    817 mp_err
    818 s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
    819 {
    820    mp_digit *pb;
    821    mp_int tmp;
    822    mp_err res;
    823    mp_size ib;
    824    mp_size useda, usedb;
    825 
    826    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
    827 
    828    if (a == c) {
    829        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    830            return res;
    831        if (a == b)
    832            b = &tmp;
    833        a = &tmp;
    834    } else if (b == c) {
    835        if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
    836            return res;
    837        b = &tmp;
    838    } else {
    839        MP_DIGITS(&tmp) = 0;
    840    }
    841 
    842    if (MP_USED(a) < MP_USED(b)) {
    843        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
    844        b = a;
    845        a = xch;
    846    }
    847 
    848    MP_USED(c) = 1;
    849    MP_DIGIT(c, 0) = 0;
    850    if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
    851        goto CLEANUP;
    852 
    853 #ifdef NSS_USE_COMBA
    854    /* comba isn't constant time because it clamps! If we cared
    855     * (we needed a constant time version of multiply that was 'faster'
    856     * we could easily pass constantTime down to the comba code and
    857     * get it to skip the clamp... but here are assembler versions
    858     * which add comba to platforms that can't compile the normal
    859     * comba's imbedded assembler which would also need to change, so
    860     * for now we just skip comba when we are running constant time. */
    861    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
    862        if (MP_USED(a) == 4) {
    863            s_mp_mul_comba_4(a, b, c);
    864            goto CLEANUP;
    865        }
    866        if (MP_USED(a) == 8) {
    867            s_mp_mul_comba_8(a, b, c);
    868            goto CLEANUP;
    869        }
    870        if (MP_USED(a) == 16) {
    871            s_mp_mul_comba_16(a, b, c);
    872            goto CLEANUP;
    873        }
    874        if (MP_USED(a) == 32) {
    875            s_mp_mul_comba_32(a, b, c);
    876            goto CLEANUP;
    877        }
    878    }
    879 #endif
    880 
    881    pb = MP_DIGITS(b);
    882    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
    883 
    884    /* Outer loop:  Digits of b */
    885    useda = MP_USED(a);
    886    usedb = MP_USED(b);
    887    for (ib = 1; ib < usedb; ib++) {
    888        mp_digit b_i = *pb++;
    889 
    890        /* Inner product:  Digits of a */
    891        if (constantTime || b_i)
    892            s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
    893        else
    894            MP_DIGIT(c, ib + useda) = b_i;
    895    }
    896 
    897    if (!constantTime) {
    898        s_mp_clamp(c);
    899    }
    900 
    901    if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
    902        SIGN(c) = ZPOS;
    903    else
    904        SIGN(c) = NEG;
    905 
    906 CLEANUP:
    907    mp_clear(&tmp);
    908    return res;
    909 } /* end smp_mulg() */
    910 
    911 /* }}} */
    912 
    913 /* {{{ mp_mul(a, b, c) */
    914 
    915 /*
    916  mp_mul(a, b, c)
    917 
    918  Compute c = a * b.  All parameters may be identical.
    919 */
    920 
    921 mp_err
    922 mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
    923 {
    924    return s_mp_mulg(a, b, c, 0);
    925 } /* end mp_mul() */
    926 
    927 /* }}} */
    928 
    929 /* {{{ mp_mulCT(a, b, c) */
    930 
    931 /*
    932  mp_mulCT(a, b, c)
    933 
    934  Compute c = a * b. In constant time. Parameters may not be identical.
    935  NOTE: a and b may be modified.
    936 */
    937 
    938 mp_err
    939 mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
    940 {
    941    mp_err res;
    942 
    943    /* make the multiply values fixed length so multiply
    944     * doesn't leak the length. at this point all the
    945     * values are blinded, but once we finish we want the
    946     * output size to be hidden (so no clamping the out put) */
    947    MP_CHECKOK(s_mp_pad(a, setSize));
    948    MP_CHECKOK(s_mp_pad(b, setSize));
    949    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
    950    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
    951 CLEANUP:
    952    return res;
    953 } /* end mp_mulCT() */
    954 
    955 /* }}} */
    956 
    957 /* {{{ mp_sqr(a, sqr) */
    958 
    959 #if MP_SQUARE
    960 /*
    961  Computes the square of a.  This can be done more
    962  efficiently than a general multiplication, because many of the
    963  computation steps are redundant when squaring.  The inner product
    964  step is a bit more complicated, but we save a fair number of
    965  iterations of the multiplication loop.
    966 */
    967 
    968 /* sqr = a^2;   Caller provides both a and tmp; */
    969 mp_err
    970 mp_sqr(const mp_int *a, mp_int *sqr)
    971 {
    972    mp_digit *pa;
    973    mp_digit d;
    974    mp_err res;
    975    mp_size ix;
    976    mp_int tmp;
    977    int count;
    978 
    979    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
    980 
    981    if (a == sqr) {
    982        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    983            return res;
    984        a = &tmp;
    985    } else {
    986        DIGITS(&tmp) = 0;
    987        res = MP_OKAY;
    988    }
    989 
    990    ix = 2 * MP_USED(a);
    991    if (ix > MP_ALLOC(sqr)) {
    992        MP_USED(sqr) = 1;
    993        MP_CHECKOK(s_mp_grow(sqr, ix));
    994    }
    995    MP_USED(sqr) = ix;
    996    MP_DIGIT(sqr, 0) = 0;
    997 
    998 #ifdef NSS_USE_COMBA
    999    if (IS_POWER_OF_2(MP_USED(a))) {
   1000        if (MP_USED(a) == 4) {
   1001            s_mp_sqr_comba_4(a, sqr);
   1002            goto CLEANUP;
   1003        }
   1004        if (MP_USED(a) == 8) {
   1005            s_mp_sqr_comba_8(a, sqr);
   1006            goto CLEANUP;
   1007        }
   1008        if (MP_USED(a) == 16) {
   1009            s_mp_sqr_comba_16(a, sqr);
   1010            goto CLEANUP;
   1011        }
   1012        if (MP_USED(a) == 32) {
   1013            s_mp_sqr_comba_32(a, sqr);
   1014            goto CLEANUP;
   1015        }
   1016    }
   1017 #endif
   1018 
   1019    pa = MP_DIGITS(a);
   1020    count = MP_USED(a) - 1;
   1021    if (count > 0) {
   1022        d = *pa++;
   1023        s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
   1024        for (ix = 3; --count > 0; ix += 2) {
   1025            d = *pa++;
   1026            s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
   1027        }                                    /* for(ix ...) */
   1028        MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
   1029 
   1030        /* now sqr *= 2 */
   1031        s_mp_mul_2(sqr);
   1032    } else {
   1033        MP_DIGIT(sqr, 1) = 0;
   1034    }
   1035 
   1036    /* now add the squares of the digits of a to sqr. */
   1037    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
   1038 
   1039    SIGN(sqr) = ZPOS;
   1040    s_mp_clamp(sqr);
   1041 
   1042 CLEANUP:
   1043    mp_clear(&tmp);
   1044    return res;
   1045 
   1046 } /* end mp_sqr() */
   1047 #endif
   1048 
   1049 /* }}} */
   1050 
   1051 /* {{{ mp_div(a, b, q, r) */
   1052 
   1053 /*
   1054  mp_div(a, b, q, r)
   1055 
   1056  Compute q = a / b and r = a mod b.  Input parameters may be re-used
   1057  as output parameters.  If q or r is NULL, that portion of the
   1058  computation will be discarded (although it will still be computed)
   1059 */
   1060 mp_err
   1061 mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
   1062 {
   1063    mp_err res;
   1064    mp_int *pQ, *pR;
   1065    mp_int qtmp, rtmp, btmp;
   1066    int cmp;
   1067    mp_sign signA;
   1068    mp_sign signB;
   1069 
   1070    ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1071 
   1072    signA = MP_SIGN(a);
   1073    signB = MP_SIGN(b);
   1074 
   1075    if (mp_cmp_z(b) == MP_EQ)
   1076        return MP_RANGE;
   1077 
   1078    DIGITS(&qtmp) = 0;
   1079    DIGITS(&rtmp) = 0;
   1080    DIGITS(&btmp) = 0;
   1081 
   1082    /* Set up some temporaries... */
   1083    if (!r || r == a || r == b) {
   1084        MP_CHECKOK(mp_init_copy(&rtmp, a));
   1085        pR = &rtmp;
   1086    } else {
   1087        MP_CHECKOK(mp_copy(a, r));
   1088        pR = r;
   1089    }
   1090 
   1091    if (!q || q == a || q == b) {
   1092        MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
   1093        pQ = &qtmp;
   1094    } else {
   1095        MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
   1096        pQ = q;
   1097        mp_zero(pQ);
   1098    }
   1099 
   1100    /*
   1101      If |a| <= |b|, we can compute the solution without division;
   1102      otherwise, we actually do the work required.
   1103     */
   1104    if ((cmp = s_mp_cmp(a, b)) <= 0) {
   1105        if (cmp) {
   1106            /* r was set to a above. */
   1107            mp_zero(pQ);
   1108        } else {
   1109            mp_set(pQ, 1);
   1110            mp_zero(pR);
   1111        }
   1112    } else {
   1113        MP_CHECKOK(mp_init_copy(&btmp, b));
   1114        MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
   1115    }
   1116 
   1117    /* Compute the signs for the output  */
   1118    MP_SIGN(pR) = signA;        /* Sr = Sa              */
   1119    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
   1120    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
   1121 
   1122    if (s_mp_cmp_d(pQ, 0) == MP_EQ)
   1123        SIGN(pQ) = ZPOS;
   1124    if (s_mp_cmp_d(pR, 0) == MP_EQ)
   1125        SIGN(pR) = ZPOS;
   1126 
   1127    /* Copy output, if it is needed      */
   1128    if (q && q != pQ)
   1129        s_mp_exch(pQ, q);
   1130 
   1131    if (r && r != pR)
   1132        s_mp_exch(pR, r);
   1133 
   1134 CLEANUP:
   1135    mp_clear(&btmp);
   1136    mp_clear(&rtmp);
   1137    mp_clear(&qtmp);
   1138 
   1139    return res;
   1140 
   1141 } /* end mp_div() */
   1142 
   1143 /* }}} */
   1144 
   1145 /* {{{ mp_div_2d(a, d, q, r) */
   1146 
   1147 mp_err
   1148 mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
   1149 {
   1150    mp_err res;
   1151 
   1152    ARGCHK(a != NULL, MP_BADARG);
   1153 
   1154    if (q) {
   1155        if ((res = mp_copy(a, q)) != MP_OKAY)
   1156            return res;
   1157    }
   1158    if (r) {
   1159        if ((res = mp_copy(a, r)) != MP_OKAY)
   1160            return res;
   1161    }
   1162    if (q) {
   1163        s_mp_div_2d(q, d);
   1164    }
   1165    if (r) {
   1166        s_mp_mod_2d(r, d);
   1167    }
   1168 
   1169    return MP_OKAY;
   1170 
   1171 } /* end mp_div_2d() */
   1172 
   1173 /* }}} */
   1174 
   1175 /* {{{ mp_expt(a, b, c) */
   1176 
   1177 /*
   1178  mp_expt(a, b, c)
   1179 
   1180  Compute c = a ** b, that is, raise a to the b power.  Uses a
   1181  standard iterative square-and-multiply technique.
   1182 */
   1183 
   1184 mp_err
   1185 mp_expt(mp_int *a, mp_int *b, mp_int *c)
   1186 {
   1187    mp_int s, x;
   1188    mp_err res;
   1189    mp_digit d;
   1190    unsigned int dig, bit;
   1191 
   1192    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1193 
   1194    if (mp_cmp_z(b) < 0)
   1195        return MP_RANGE;
   1196 
   1197    if ((res = mp_init(&s)) != MP_OKAY)
   1198        return res;
   1199 
   1200    mp_set(&s, 1);
   1201 
   1202    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
   1203        goto X;
   1204 
   1205    /* Loop over low-order digits in ascending order */
   1206    for (dig = 0; dig < (USED(b) - 1); dig++) {
   1207        d = DIGIT(b, dig);
   1208 
   1209        /* Loop over bits of each non-maximal digit */
   1210        for (bit = 0; bit < DIGIT_BIT; bit++) {
   1211            if (d & 1) {
   1212                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1213                    goto CLEANUP;
   1214            }
   1215 
   1216            d >>= 1;
   1217 
   1218            if ((res = s_mp_sqr(&x)) != MP_OKAY)
   1219                goto CLEANUP;
   1220        }
   1221    }
   1222 
   1223    /* Consider now the last digit... */
   1224    d = DIGIT(b, dig);
   1225 
   1226    while (d) {
   1227        if (d & 1) {
   1228            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1229                goto CLEANUP;
   1230        }
   1231 
   1232        d >>= 1;
   1233 
   1234        if ((res = s_mp_sqr(&x)) != MP_OKAY)
   1235            goto CLEANUP;
   1236    }
   1237 
   1238    if (mp_iseven(b))
   1239        SIGN(&s) = SIGN(a);
   1240 
   1241    res = mp_copy(&s, c);
   1242 
   1243 CLEANUP:
   1244    mp_clear(&x);
   1245 X:
   1246    mp_clear(&s);
   1247 
   1248    return res;
   1249 
   1250 } /* end mp_expt() */
   1251 
   1252 /* }}} */
   1253 
   1254 /* {{{ mp_2expt(a, k) */
   1255 
   1256 /* Compute a = 2^k */
   1257 
   1258 mp_err
   1259 mp_2expt(mp_int *a, mp_digit k)
   1260 {
   1261    ARGCHK(a != NULL, MP_BADARG);
   1262 
   1263    return s_mp_2expt(a, k);
   1264 
   1265 } /* end mp_2expt() */
   1266 
   1267 /* }}} */
   1268 
   1269 /* {{{ mp_mod(a, m, c) */
   1270 
   1271 /*
   1272  mp_mod(a, m, c)
   1273 
   1274  Compute c = a (mod m).  Result will always be 0 <= c < m.
   1275 */
   1276 
   1277 mp_err
   1278 mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
   1279 {
   1280    mp_err res;
   1281    int mag;
   1282 
   1283    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   1284 
   1285    if (SIGN(m) == NEG)
   1286        return MP_RANGE;
   1287 
   1288    /*
   1289     If |a| > m, we need to divide to get the remainder and take the
   1290     absolute value.
   1291 
   1292     If |a| < m, we don't need to do any division, just copy and adjust
   1293     the sign (if a is negative).
   1294 
   1295     If |a| == m, we can simply set the result to zero.
   1296 
   1297     This order is intended to minimize the average path length of the
   1298     comparison chain on common workloads -- the most frequent cases are
   1299     that |a| != m, so we do those first.
   1300     */
   1301    if ((mag = s_mp_cmp(a, m)) > 0) {
   1302        if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
   1303            return res;
   1304 
   1305        if (SIGN(c) == NEG) {
   1306            if ((res = mp_add(c, m, c)) != MP_OKAY)
   1307                return res;
   1308        }
   1309 
   1310    } else if (mag < 0) {
   1311        if ((res = mp_copy(a, c)) != MP_OKAY)
   1312            return res;
   1313 
   1314        if (mp_cmp_z(a) < 0) {
   1315            if ((res = mp_add(c, m, c)) != MP_OKAY)
   1316                return res;
   1317        }
   1318 
   1319    } else {
   1320        mp_zero(c);
   1321    }
   1322 
   1323    return MP_OKAY;
   1324 
   1325 } /* end mp_mod() */
   1326 
   1327 /* }}} */
   1328 
   1329 /* {{{ s_mp_subCT_d(a, b, borrow, c) */
   1330 
   1331 /*
   1332  s_mp_subCT_d(a, b, borrow, c)
   1333 
   1334  Compute c = (a -b) - subtract in constant time. returns borrow
   1335 */
   1336 mp_digit
   1337 s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
   1338 {
   1339    *ret = a - b - borrow;
   1340    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
   1341 } /*  s_mp_subCT_d() */
   1342 
   1343 /* }}} */
   1344 
   1345 /* {{{ mp_subCT(a, b, ret, borrow) */
   1346 
   1347 /* return ret= a - b and borrow in borrow. done in constant time.
   1348 * b could be modified.
   1349 */
   1350 mp_err
   1351 mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
   1352 {
   1353    mp_size used_a = MP_USED(a);
   1354    mp_size i;
   1355    mp_err res;
   1356 
   1357    MP_CHECKOK(s_mp_pad(b, used_a));
   1358    MP_CHECKOK(s_mp_pad(ret, used_a));
   1359    *borrow = 0;
   1360    for (i = 0; i < used_a; i++) {
   1361        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
   1362                               &MP_DIGIT(ret, i));
   1363    }
   1364 
   1365    res = MP_OKAY;
   1366 CLEANUP:
   1367    return res;
   1368 } /*  end mp_subCT() */
   1369 
   1370 /* }}} */
   1371 
   1372 /* {{{ mp_selectCT(cond, a, b, ret) */
   1373 
   1374 /*
   1375 * return ret= cond ? a : b; cond should be either 0 or 1
   1376 */
   1377 mp_err
   1378 mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
   1379 {
   1380    mp_size used_a = MP_USED(a);
   1381    mp_err res;
   1382    mp_size i;
   1383 
   1384    cond *= MP_DIGIT_MAX;
   1385 
   1386    /* we currently require these to be equal on input,
   1387     * we could use pad to extend one of them, but that might
   1388     * leak data as it wouldn't be constant time */
   1389    if (used_a != MP_USED(b)) {
   1390        return MP_BADARG;
   1391    }
   1392 
   1393    MP_CHECKOK(s_mp_pad(ret, used_a));
   1394    for (i = 0; i < used_a; i++) {
   1395        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
   1396    }
   1397    res = MP_OKAY;
   1398 CLEANUP:
   1399    return res;
   1400 } /* end mp_selectCT() */
   1401 
   1402 /* {{{ mp_reduceCT(a, m, c) */
   1403 
   1404 /*
   1405  mp_reduceCT(a, m, c)
   1406 
   1407  Compute c = aR^-1 (mod m) in constant time.
   1408   input should be in montgomery form. If input is the
   1409   result of a montgomery multiply then out put will be
   1410   in mongomery form.
   1411   Result will be reduced to MP_USED(m), but not be
   1412   clamped.
   1413 */
   1414 
   1415 mp_err
   1416 mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
   1417 {
   1418    mp_size used_m = MP_USED(m);
   1419    mp_size used_c = used_m * 2 + 1;
   1420    mp_digit *m_digits, *c_digits;
   1421    mp_size i;
   1422    mp_digit borrow, carry;
   1423    mp_err res;
   1424    mp_int sub;
   1425 
   1426    MP_DIGITS(&sub) = 0;
   1427    MP_CHECKOK(mp_init_size(&sub, used_m));
   1428 
   1429    if (a != c) {
   1430        MP_CHECKOK(mp_copy(a, c));
   1431    }
   1432    MP_CHECKOK(s_mp_pad(c, used_c));
   1433    m_digits = MP_DIGITS(m);
   1434    c_digits = MP_DIGITS(c);
   1435    for (i = 0; i < used_m; i++) {
   1436        mp_digit m_i = MP_DIGIT(c, i) * n0i;
   1437        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
   1438    }
   1439    s_mp_rshd(c, used_m);
   1440    /* MP_USED(c) should be used_m+1 with the high word being any carry
   1441     * from the previous multiply, save that carry and drop the high
   1442     * word for the substraction below */
   1443    carry = MP_DIGIT(c, used_m);
   1444    MP_DIGIT(c, used_m) = 0;
   1445    MP_USED(c) = used_m;
   1446    /* mp_subCT wants c and m to be the same size, we've already
   1447     * guarrenteed that in the previous statement, so mp_subCT won't actually
   1448     * modify m, so it's safe to recast */
   1449    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
   1450 
   1451    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
   1452    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
   1453    res = MP_OKAY;
   1454 CLEANUP:
   1455    mp_clear(&sub);
   1456    return res;
   1457 } /* end mp_reduceCT() */
   1458 
   1459 /* }}} */
   1460 
   1461 /* {{{ mp_mod_d(a, d, c) */
   1462 
   1463 /*
   1464  mp_mod_d(a, d, c)
   1465 
   1466  Compute c = a (mod d).  Result will always be 0 <= c < d
   1467 */
   1468 mp_err
   1469 mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
   1470 {
   1471    mp_err res;
   1472    mp_digit rem;
   1473 
   1474    ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1475 
   1476    if (s_mp_cmp_d(a, d) > 0) {
   1477        if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
   1478            return res;
   1479 
   1480    } else {
   1481        if (SIGN(a) == NEG)
   1482            rem = d - DIGIT(a, 0);
   1483        else
   1484            rem = DIGIT(a, 0);
   1485    }
   1486 
   1487    if (c)
   1488        *c = rem;
   1489 
   1490    return MP_OKAY;
   1491 
   1492 } /* end mp_mod_d() */
   1493 
   1494 /* }}} */
   1495 
   1496 /* }}} */
   1497 
   1498 /*------------------------------------------------------------------------*/
   1499 /* {{{ Modular arithmetic */
   1500 
   1501 #if MP_MODARITH
   1502 /* {{{ mp_addmod(a, b, m, c) */
   1503 
   1504 /*
   1505  mp_addmod(a, b, m, c)
   1506 
   1507  Compute c = (a + b) mod m
   1508 */
   1509 
   1510 mp_err
   1511 mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
   1512 {
   1513    mp_err res;
   1514 
   1515    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1516 
   1517    if ((res = mp_add(a, b, c)) != MP_OKAY)
   1518        return res;
   1519    if ((res = mp_mod(c, m, c)) != MP_OKAY)
   1520        return res;
   1521 
   1522    return MP_OKAY;
   1523 }
   1524 
   1525 /* }}} */
   1526 
   1527 /* {{{ mp_submod(a, b, m, c) */
   1528 
   1529 /*
   1530  mp_submod(a, b, m, c)
   1531 
   1532  Compute c = (a - b) mod m
   1533 */
   1534 
   1535 mp_err
   1536 mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
   1537 {
   1538    mp_err res;
   1539 
   1540    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1541 
   1542    if ((res = mp_sub(a, b, c)) != MP_OKAY)
   1543        return res;
   1544    if ((res = mp_mod(c, m, c)) != MP_OKAY)
   1545        return res;
   1546 
   1547    return MP_OKAY;
   1548 }
   1549 
   1550 /* }}} */
   1551 
   1552 /* {{{ mp_mulmod(a, b, m, c) */
   1553 
   1554 /*
   1555  mp_mulmod(a, b, m, c)
   1556 
   1557  Compute c = (a * b) mod m
   1558 */
   1559 
   1560 mp_err
   1561 mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
   1562 {
   1563    mp_err res;
   1564 
   1565    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1566 
   1567    if ((res = mp_mul(a, b, c)) != MP_OKAY)
   1568        return res;
   1569    if ((res = mp_mod(c, m, c)) != MP_OKAY)
   1570        return res;
   1571 
   1572    return MP_OKAY;
   1573 }
   1574 
   1575 /* }}} */
   1576 
   1577 /* {{{ mp_mulmontmodCT(a, b, m, c) */
   1578 
   1579 /*
   1580  mp_mulmontmodCT(a, b, m, c)
   1581 
   1582  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
   1583  should be in montgomery form and the output is native. If both a and b
   1584  are in montgomery form, then the output will also be in montgomery form
   1585  and can be recovered with an mp_reduceCT call.
   1586  NOTE: a and b may be modified.
   1587 */
   1588 
   1589 mp_err
   1590 mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
   1591                mp_int *c)
   1592 {
   1593    mp_err res;
   1594 
   1595    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1596 
   1597    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
   1598        return res;
   1599 
   1600    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
   1601        return res;
   1602 
   1603    return MP_OKAY;
   1604 }
   1605 
   1606 /* }}} */
   1607 
   1608 /* {{{ mp_sqrmod(a, m, c) */
   1609 
   1610 #if MP_SQUARE
   1611 mp_err
   1612 mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
   1613 {
   1614    mp_err res;
   1615 
   1616    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   1617 
   1618    if ((res = mp_sqr(a, c)) != MP_OKAY)
   1619        return res;
   1620    if ((res = mp_mod(c, m, c)) != MP_OKAY)
   1621        return res;
   1622 
   1623    return MP_OKAY;
   1624 
   1625 } /* end mp_sqrmod() */
   1626 #endif
   1627 
   1628 /* }}} */
   1629 
   1630 /* {{{ s_mp_exptmod(a, b, m, c) */
   1631 
   1632 /*
   1633  s_mp_exptmod(a, b, m, c)
   1634 
   1635  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
   1636  method with modular reductions at each step. (This is basically the
   1637  same code as mp_expt(), except for the addition of the reductions)
   1638 
   1639  The modular reductions are done using Barrett's algorithm (see
   1640  s_mp_reduce() below for details)
   1641 */
   1642 
   1643 mp_err
   1644 s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
   1645 {
   1646    mp_int s, x, mu;
   1647    mp_err res;
   1648    mp_digit d;
   1649    unsigned int dig, bit;
   1650 
   1651    ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
   1652 
   1653    if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
   1654        return MP_RANGE;
   1655 
   1656    if ((res = mp_init(&s)) != MP_OKAY)
   1657        return res;
   1658    if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
   1659        (res = mp_mod(&x, m, &x)) != MP_OKAY)
   1660        goto X;
   1661    if ((res = mp_init(&mu)) != MP_OKAY)
   1662        goto MU;
   1663 
   1664    mp_set(&s, 1);
   1665 
   1666    /* mu = b^2k / m */
   1667    if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
   1668        goto CLEANUP;
   1669    if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
   1670        goto CLEANUP;
   1671    if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
   1672        goto CLEANUP;
   1673 
   1674    /* Loop over digits of b in ascending order, except highest order */
   1675    for (dig = 0; dig < (USED(b) - 1); dig++) {
   1676        d = DIGIT(b, dig);
   1677 
   1678        /* Loop over the bits of the lower-order digits */
   1679        for (bit = 0; bit < DIGIT_BIT; bit++) {
   1680            if (d & 1) {
   1681                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1682                    goto CLEANUP;
   1683                if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
   1684                    goto CLEANUP;
   1685            }
   1686 
   1687            d >>= 1;
   1688 
   1689            if ((res = s_mp_sqr(&x)) != MP_OKAY)
   1690                goto CLEANUP;
   1691            if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
   1692                goto CLEANUP;
   1693        }
   1694    }
   1695 
   1696    /* Now do the last digit... */
   1697    d = DIGIT(b, dig);
   1698 
   1699    while (d) {
   1700        if (d & 1) {
   1701            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1702                goto CLEANUP;
   1703            if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
   1704                goto CLEANUP;
   1705        }
   1706 
   1707        d >>= 1;
   1708 
   1709        if ((res = s_mp_sqr(&x)) != MP_OKAY)
   1710            goto CLEANUP;
   1711        if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
   1712            goto CLEANUP;
   1713    }
   1714 
   1715    s_mp_exch(&s, c);
   1716 
   1717 CLEANUP:
   1718    mp_clear(&mu);
   1719 MU:
   1720    mp_clear(&x);
   1721 X:
   1722    mp_clear(&s);
   1723 
   1724    return res;
   1725 
   1726 } /* end s_mp_exptmod() */
   1727 
   1728 /* }}} */
   1729 
   1730 /* {{{ mp_exptmod_d(a, d, m, c) */
   1731 
   1732 mp_err
   1733 mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
   1734 {
   1735    mp_int s, x;
   1736    mp_err res;
   1737 
   1738    ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
   1739 
   1740    if ((res = mp_init(&s)) != MP_OKAY)
   1741        return res;
   1742    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
   1743        goto X;
   1744 
   1745    mp_set(&s, 1);
   1746 
   1747    while (d != 0) {
   1748        if (d & 1) {
   1749            if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
   1750                (res = mp_mod(&s, m, &s)) != MP_OKAY)
   1751                goto CLEANUP;
   1752        }
   1753 
   1754        d /= 2;
   1755 
   1756        if ((res = s_mp_sqr(&x)) != MP_OKAY ||
   1757            (res = mp_mod(&x, m, &x)) != MP_OKAY)
   1758            goto CLEANUP;
   1759    }
   1760 
   1761    s_mp_exch(&s, c);
   1762 
   1763 CLEANUP:
   1764    mp_clear(&x);
   1765 X:
   1766    mp_clear(&s);
   1767 
   1768    return res;
   1769 
   1770 } /* end mp_exptmod_d() */
   1771 
   1772 /* }}} */
   1773 #endif /* if MP_MODARITH */
   1774 
   1775 /* }}} */
   1776 
   1777 /*------------------------------------------------------------------------*/
   1778 /* {{{ Comparison functions */
   1779 
   1780 /* {{{ mp_cmp_z(a) */
   1781 
   1782 /*
   1783  mp_cmp_z(a)
   1784 
   1785  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
   1786 */
   1787 
   1788 int
   1789 mp_cmp_z(const mp_int *a)
   1790 {
   1791    ARGMPCHK(a != NULL);
   1792 
   1793    if (SIGN(a) == NEG)
   1794        return MP_LT;
   1795    else if (USED(a) == 1 && DIGIT(a, 0) == 0)
   1796        return MP_EQ;
   1797    else
   1798        return MP_GT;
   1799 
   1800 } /* end mp_cmp_z() */
   1801 
   1802 /* }}} */
   1803 
   1804 /* {{{ mp_cmp_d(a, d) */
   1805 
   1806 /*
   1807  mp_cmp_d(a, d)
   1808 
   1809  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
   1810 */
   1811 
   1812 int
   1813 mp_cmp_d(const mp_int *a, mp_digit d)
   1814 {
   1815    ARGCHK(a != NULL, MP_EQ);
   1816 
   1817    if (SIGN(a) == NEG)
   1818        return MP_LT;
   1819 
   1820    return s_mp_cmp_d(a, d);
   1821 
   1822 } /* end mp_cmp_d() */
   1823 
   1824 /* }}} */
   1825 
   1826 /* {{{ mp_cmp(a, b) */
   1827 
   1828 int
   1829 mp_cmp(const mp_int *a, const mp_int *b)
   1830 {
   1831    ARGCHK(a != NULL && b != NULL, MP_EQ);
   1832 
   1833    if (SIGN(a) == SIGN(b)) {
   1834        int mag;
   1835 
   1836        if ((mag = s_mp_cmp(a, b)) == MP_EQ)
   1837            return MP_EQ;
   1838 
   1839        if (SIGN(a) == ZPOS)
   1840            return mag;
   1841        else
   1842            return -mag;
   1843 
   1844    } else if (SIGN(a) == ZPOS) {
   1845        return MP_GT;
   1846    } else {
   1847        return MP_LT;
   1848    }
   1849 
   1850 } /* end mp_cmp() */
   1851 
   1852 /* }}} */
   1853 
   1854 /* {{{ mp_cmp_mag(a, b) */
   1855 
   1856 /*
   1857  mp_cmp_mag(a, b)
   1858 
   1859  Compares |a| <=> |b|, and returns an appropriate comparison result
   1860 */
   1861 
   1862 int
   1863 mp_cmp_mag(const mp_int *a, const mp_int *b)
   1864 {
   1865    ARGCHK(a != NULL && b != NULL, MP_EQ);
   1866 
   1867    return s_mp_cmp(a, b);
   1868 
   1869 } /* end mp_cmp_mag() */
   1870 
   1871 /* }}} */
   1872 
   1873 /* {{{ mp_isodd(a) */
   1874 
   1875 /*
   1876  mp_isodd(a)
   1877 
   1878  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
   1879 */
   1880 int
   1881 mp_isodd(const mp_int *a)
   1882 {
   1883    ARGMPCHK(a != NULL);
   1884 
   1885    return (int)(DIGIT(a, 0) & 1);
   1886 
   1887 } /* end mp_isodd() */
   1888 
   1889 /* }}} */
   1890 
   1891 /* {{{ mp_iseven(a) */
   1892 
   1893 int
   1894 mp_iseven(const mp_int *a)
   1895 {
   1896    return !mp_isodd(a);
   1897 
   1898 } /* end mp_iseven() */
   1899 
   1900 /* }}} */
   1901 
   1902 /* }}} */
   1903 
   1904 /*------------------------------------------------------------------------*/
   1905 /* {{{ Number theoretic functions */
   1906 
   1907 /* {{{ mp_gcd(a, b, c) */
   1908 
   1909 /*
   1910  Computes the GCD using the constant-time algorithm
   1911  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
   1912  "Fast constant-time gcd computation and modular inversion"
   1913 */
   1914 mp_err
   1915 mp_gcd(mp_int *a, mp_int *b, mp_int *c)
   1916 {
   1917    mp_err res;
   1918    mp_digit cond = 0, mask = 0;
   1919    mp_int g, temp, f;
   1920    int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
   1921    mp_size top, flen, glen;
   1922    mp_int *clear[3];
   1923 
   1924    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1925    /*
   1926    Early exit if either of the inputs is zero.
   1927    Caller is responsible for the proper handling of inputs.
   1928    */
   1929    if (mp_cmp_z(a) == MP_EQ) {
   1930        res = mp_copy(b, c);
   1931        SIGN(c) = ZPOS;
   1932        return res;
   1933    } else if (mp_cmp_z(b) == MP_EQ) {
   1934        res = mp_copy(a, c);
   1935        SIGN(c) = ZPOS;
   1936        return res;
   1937    }
   1938 
   1939    MP_CHECKOK(mp_init(&temp));
   1940    clear[++last] = &temp;
   1941    MP_CHECKOK(mp_init_copy(&g, a));
   1942    clear[++last] = &g;
   1943    MP_CHECKOK(mp_init_copy(&f, b));
   1944    clear[++last] = &f;
   1945 
   1946    /*
   1947    For even case compute the number of
   1948    shared powers of 2 in f and g.
   1949    */
   1950    for (i = 0; i < USED(&f) && i < USED(&g); i++) {
   1951        mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
   1952        for (j = 0; j < MP_DIGIT_BIT; j++) {
   1953            bit &= mask;
   1954            shifts += bit;
   1955            mask >>= 1;
   1956        }
   1957    }
   1958    /* Reduce to the odd case by removing the powers of 2. */
   1959    s_mp_div_2d(&f, shifts);
   1960    s_mp_div_2d(&g, shifts);
   1961 
   1962    /* Allocate to the size of largest mp_int. */
   1963    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
   1964    MP_CHECKOK(s_mp_grow(&f, top));
   1965    MP_CHECKOK(s_mp_grow(&g, top));
   1966    MP_CHECKOK(s_mp_grow(&temp, top));
   1967 
   1968    /* Make sure f contains the odd value. */
   1969    MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
   1970 
   1971    /* Upper bound for the total iterations. */
   1972    flen = mpl_significant_bits(&f);
   1973    glen = mpl_significant_bits(&g);
   1974    m = 4 + 3 * ((flen >= glen) ? flen : glen);
   1975 
   1976 #if defined(_MSC_VER)
   1977 #pragma warning(push)
   1978 #pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
   1979 #endif
   1980 
   1981    for (i = 0; i < m; i++) {
   1982        /* Step 1: conditional swap. */
   1983        /* Set cond if delta > 0 and g is odd. */
   1984        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
   1985        /* If cond is set replace (delta,f) with (-delta,-f). */
   1986        delta = (-cond & -delta) | ((cond - 1) & delta);
   1987        SIGN(&f) ^= cond;
   1988        /* If cond is set swap f with g. */
   1989        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
   1990 
   1991        /* Step 2: elemination. */
   1992        /* Update delta. */
   1993        delta++;
   1994        /* If g is odd, right shift (g+f) else right shift g. */
   1995        MP_CHECKOK(mp_add(&g, &f, &temp));
   1996        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
   1997        s_mp_div_2(&g);
   1998    }
   1999 
   2000 #if defined(_MSC_VER)
   2001 #pragma warning(pop)
   2002 #endif
   2003 
   2004    /* GCD is in f, take the absolute value. */
   2005    SIGN(&f) = ZPOS;
   2006 
   2007    /* Add back the removed powers of 2. */
   2008    MP_CHECKOK(s_mp_mul_2d(&f, shifts));
   2009 
   2010    MP_CHECKOK(mp_copy(&f, c));
   2011 
   2012 CLEANUP:
   2013    while (last >= 0)
   2014        mp_clear(clear[last--]);
   2015    return res;
   2016 } /* end mp_gcd() */
   2017 
   2018 /* }}} */
   2019 
   2020 /* {{{ mp_lcm(a, b, c) */
   2021 
   2022 /* We compute the least common multiple using the rule:
   2023 
   2024   ab = [a, b](a, b)
   2025 
   2026   ... by computing the product, and dividing out the gcd.
   2027 */
   2028 
   2029 mp_err
   2030 mp_lcm(mp_int *a, mp_int *b, mp_int *c)
   2031 {
   2032    mp_int gcd, prod;
   2033    mp_err res;
   2034 
   2035    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   2036 
   2037    /* Set up temporaries */
   2038    if ((res = mp_init(&gcd)) != MP_OKAY)
   2039        return res;
   2040    if ((res = mp_init(&prod)) != MP_OKAY)
   2041        goto GCD;
   2042 
   2043    if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
   2044        goto CLEANUP;
   2045    if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
   2046        goto CLEANUP;
   2047 
   2048    res = mp_div(&prod, &gcd, c, NULL);
   2049 
   2050 CLEANUP:
   2051    mp_clear(&prod);
   2052 GCD:
   2053    mp_clear(&gcd);
   2054 
   2055    return res;
   2056 
   2057 } /* end mp_lcm() */
   2058 
   2059 /* }}} */
   2060 
   2061 /* {{{ mp_xgcd(a, b, g, x, y) */
   2062 
   2063 /*
   2064  mp_xgcd(a, b, g, x, y)
   2065 
   2066  Compute g = (a, b) and values x and y satisfying Bezout's identity
   2067  (that is, ax + by = g).  This uses the binary extended GCD algorithm
   2068  based on the Stein algorithm used for mp_gcd()
   2069  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
   2070 */
   2071 
   2072 mp_err
   2073 mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
   2074 {
   2075    mp_int gx, xc, yc, u, v, A, B, C, D;
   2076    mp_int *clean[9];
   2077    mp_err res;
   2078    int last = -1;
   2079 
   2080    if (mp_cmp_z(b) == 0)
   2081        return MP_RANGE;
   2082 
   2083    /* Initialize all these variables we need */
   2084    MP_CHECKOK(mp_init(&u));
   2085    clean[++last] = &u;
   2086    MP_CHECKOK(mp_init(&v));
   2087    clean[++last] = &v;
   2088    MP_CHECKOK(mp_init(&gx));
   2089    clean[++last] = &gx;
   2090    MP_CHECKOK(mp_init(&A));
   2091    clean[++last] = &A;
   2092    MP_CHECKOK(mp_init(&B));
   2093    clean[++last] = &B;
   2094    MP_CHECKOK(mp_init(&C));
   2095    clean[++last] = &C;
   2096    MP_CHECKOK(mp_init(&D));
   2097    clean[++last] = &D;
   2098    MP_CHECKOK(mp_init_copy(&xc, a));
   2099    clean[++last] = &xc;
   2100    mp_abs(&xc, &xc);
   2101    MP_CHECKOK(mp_init_copy(&yc, b));
   2102    clean[++last] = &yc;
   2103    mp_abs(&yc, &yc);
   2104 
   2105    mp_set(&gx, 1);
   2106 
   2107    /* Divide by two until at least one of them is odd */
   2108    while (mp_iseven(&xc) && mp_iseven(&yc)) {
   2109        mp_size nx = mp_trailing_zeros(&xc);
   2110        mp_size ny = mp_trailing_zeros(&yc);
   2111        mp_size n = MP_MIN(nx, ny);
   2112        s_mp_div_2d(&xc, n);
   2113        s_mp_div_2d(&yc, n);
   2114        MP_CHECKOK(s_mp_mul_2d(&gx, n));
   2115    }
   2116 
   2117    MP_CHECKOK(mp_copy(&xc, &u));
   2118    MP_CHECKOK(mp_copy(&yc, &v));
   2119    mp_set(&A, 1);
   2120    mp_set(&D, 1);
   2121 
   2122    /* Loop through binary GCD algorithm */
   2123    do {
   2124        while (mp_iseven(&u)) {
   2125            s_mp_div_2(&u);
   2126 
   2127            if (mp_iseven(&A) && mp_iseven(&B)) {
   2128                s_mp_div_2(&A);
   2129                s_mp_div_2(&B);
   2130            } else {
   2131                MP_CHECKOK(mp_add(&A, &yc, &A));
   2132                s_mp_div_2(&A);
   2133                MP_CHECKOK(mp_sub(&B, &xc, &B));
   2134                s_mp_div_2(&B);
   2135            }
   2136        }
   2137 
   2138        while (mp_iseven(&v)) {
   2139            s_mp_div_2(&v);
   2140 
   2141            if (mp_iseven(&C) && mp_iseven(&D)) {
   2142                s_mp_div_2(&C);
   2143                s_mp_div_2(&D);
   2144            } else {
   2145                MP_CHECKOK(mp_add(&C, &yc, &C));
   2146                s_mp_div_2(&C);
   2147                MP_CHECKOK(mp_sub(&D, &xc, &D));
   2148                s_mp_div_2(&D);
   2149            }
   2150        }
   2151 
   2152        if (mp_cmp(&u, &v) >= 0) {
   2153            MP_CHECKOK(mp_sub(&u, &v, &u));
   2154            MP_CHECKOK(mp_sub(&A, &C, &A));
   2155            MP_CHECKOK(mp_sub(&B, &D, &B));
   2156        } else {
   2157            MP_CHECKOK(mp_sub(&v, &u, &v));
   2158            MP_CHECKOK(mp_sub(&C, &A, &C));
   2159            MP_CHECKOK(mp_sub(&D, &B, &D));
   2160        }
   2161    } while (mp_cmp_z(&u) != 0);
   2162 
   2163    /* copy results to output */
   2164    if (x)
   2165        MP_CHECKOK(mp_copy(&C, x));
   2166 
   2167    if (y)
   2168        MP_CHECKOK(mp_copy(&D, y));
   2169 
   2170    if (g)
   2171        MP_CHECKOK(mp_mul(&gx, &v, g));
   2172 
   2173 CLEANUP:
   2174    while (last >= 0)
   2175        mp_clear(clean[last--]);
   2176 
   2177    return res;
   2178 
   2179 } /* end mp_xgcd() */
   2180 
   2181 /* }}} */
   2182 
   2183 mp_size
   2184 mp_trailing_zeros(const mp_int *mp)
   2185 {
   2186    mp_digit d;
   2187    mp_size n = 0;
   2188    unsigned int ix;
   2189 
   2190    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
   2191        return n;
   2192 
   2193    for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
   2194        n += MP_DIGIT_BIT;
   2195    if (!d)
   2196        return 0; /* shouldn't happen, but ... */
   2197 #if !defined(MP_USE_UINT_DIGIT)
   2198    if (!(d & 0xffffffffU)) {
   2199        d >>= 32;
   2200        n += 32;
   2201    }
   2202 #endif
   2203    if (!(d & 0xffffU)) {
   2204        d >>= 16;
   2205        n += 16;
   2206    }
   2207    if (!(d & 0xffU)) {
   2208        d >>= 8;
   2209        n += 8;
   2210    }
   2211    if (!(d & 0xfU)) {
   2212        d >>= 4;
   2213        n += 4;
   2214    }
   2215    if (!(d & 0x3U)) {
   2216        d >>= 2;
   2217        n += 2;
   2218    }
   2219    if (!(d & 0x1U)) {
   2220        d >>= 1;
   2221        n += 1;
   2222    }
   2223 #if MP_ARGCHK == 2
   2224    assert(0 != (d & 1));
   2225 #endif
   2226    return n;
   2227 }
   2228 
   2229 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
   2230 ** Returns k (positive) or error (negative).
   2231 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
   2232 ** by Richard Schroeppel (a.k.a. Captain Nemo).
   2233 */
   2234 mp_err
   2235 s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
   2236 {
   2237    mp_err res;
   2238    mp_err k = 0;
   2239    mp_int d, f, g;
   2240 
   2241    ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
   2242 
   2243    MP_DIGITS(&d) = 0;
   2244    MP_DIGITS(&f) = 0;
   2245    MP_DIGITS(&g) = 0;
   2246    MP_CHECKOK(mp_init(&d));
   2247    MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
   2248    MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
   2249 
   2250    mp_set(c, 1);
   2251    mp_zero(&d);
   2252 
   2253    if (mp_cmp_z(&f) == 0) {
   2254        res = MP_UNDEF;
   2255    } else
   2256        for (;;) {
   2257            int diff_sign;
   2258            while (mp_iseven(&f)) {
   2259                mp_size n = mp_trailing_zeros(&f);
   2260                if (!n) {
   2261                    res = MP_UNDEF;
   2262                    goto CLEANUP;
   2263                }
   2264                s_mp_div_2d(&f, n);
   2265                MP_CHECKOK(s_mp_mul_2d(&d, n));
   2266                k += n;
   2267            }
   2268            if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
   2269                res = k;
   2270                break;
   2271            }
   2272            diff_sign = mp_cmp(&f, &g);
   2273            if (diff_sign < 0) { /* f < g */
   2274                s_mp_exch(&f, &g);
   2275                s_mp_exch(c, &d);
   2276            } else if (diff_sign == 0) { /* f == g */
   2277                res = MP_UNDEF;          /* a and p are not relatively prime */
   2278                break;
   2279            }
   2280            if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
   2281                MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
   2282                MP_CHECKOK(mp_sub(c, &d, c));   /* c = c - d */
   2283            } else {
   2284                MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
   2285                MP_CHECKOK(mp_add(c, &d, c));   /* c = c + d */
   2286            }
   2287        }
   2288    if (res >= 0) {
   2289        if (mp_cmp_mag(c, p) >= 0) {
   2290            MP_CHECKOK(mp_div(c, p, NULL, c));
   2291        }
   2292        if (MP_SIGN(c) != MP_ZPOS) {
   2293            MP_CHECKOK(mp_add(c, p, c));
   2294        }
   2295        res = k;
   2296    }
   2297 
   2298 CLEANUP:
   2299    mp_clear(&d);
   2300    mp_clear(&f);
   2301    mp_clear(&g);
   2302    return res;
   2303 }
   2304 
   2305 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
   2306 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
   2307 ** by Richard Schroeppel (a.k.a. Captain Nemo).
   2308 */
   2309 mp_digit
   2310 s_mp_invmod_radix(mp_digit P)
   2311 {
   2312    mp_digit T = P;
   2313    T *= 2 - (P * T);
   2314    T *= 2 - (P * T);
   2315    T *= 2 - (P * T);
   2316    T *= 2 - (P * T);
   2317 #if !defined(MP_USE_UINT_DIGIT)
   2318    T *= 2 - (P * T);
   2319    T *= 2 - (P * T);
   2320 #endif
   2321    return T;
   2322 }
   2323 
   2324 /* Given c, k, and prime p, where a*c == 2**k (mod p),
   2325 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
   2326 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
   2327 ** by Richard Schroeppel (a.k.a. Captain Nemo).
   2328 */
   2329 mp_err
   2330 s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
   2331 {
   2332    int k_orig = k;
   2333    mp_digit r;
   2334    mp_size ix;
   2335    mp_err res;
   2336 
   2337    if (mp_cmp_z(c) < 0) {           /* c < 0 */
   2338        MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
   2339    } else {
   2340        MP_CHECKOK(mp_copy(c, x)); /* x = c */
   2341    }
   2342 
   2343    /* make sure x is large enough */
   2344    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
   2345    ix = MP_MAX(ix, MP_USED(x));
   2346    MP_CHECKOK(s_mp_pad(x, ix));
   2347 
   2348    r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
   2349 
   2350    for (ix = 0; k > 0; ix++) {
   2351        int j = MP_MIN(k, MP_DIGIT_BIT);
   2352        mp_digit v = r * MP_DIGIT(x, ix);
   2353        if (j < MP_DIGIT_BIT) {
   2354            v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
   2355        }
   2356        s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
   2357        k -= j;
   2358    }
   2359    s_mp_clamp(x);
   2360    s_mp_div_2d(x, k_orig);
   2361    res = MP_OKAY;
   2362 
   2363 CLEANUP:
   2364    return res;
   2365 }
   2366 
   2367 /*
   2368  Computes the modular inverse using the constant-time algorithm
   2369  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
   2370  "Fast constant-time gcd computation and modular inversion"
   2371 */
   2372 mp_err
   2373 s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
   2374 {
   2375    mp_err res;
   2376    mp_digit cond = 0;
   2377    mp_int g, f, v, r, temp;
   2378    int i, its, delta = 1, last = -1;
   2379    mp_size top, flen, glen;
   2380    mp_int *clear[6];
   2381 
   2382    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   2383    /* Check for invalid inputs. */
   2384    if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
   2385        return MP_RANGE;
   2386 
   2387    if (a == m || mp_iseven(m))
   2388        return MP_UNDEF;
   2389 
   2390    MP_CHECKOK(mp_init(&temp));
   2391    clear[++last] = &temp;
   2392    MP_CHECKOK(mp_init(&v));
   2393    clear[++last] = &v;
   2394    MP_CHECKOK(mp_init(&r));
   2395    clear[++last] = &r;
   2396    MP_CHECKOK(mp_init_copy(&g, a));
   2397    clear[++last] = &g;
   2398    MP_CHECKOK(mp_init_copy(&f, m));
   2399    clear[++last] = &f;
   2400 
   2401    mp_set(&v, 0);
   2402    mp_set(&r, 1);
   2403 
   2404    /* Allocate to the size of largest mp_int. */
   2405    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
   2406    MP_CHECKOK(s_mp_grow(&f, top));
   2407    MP_CHECKOK(s_mp_grow(&g, top));
   2408    MP_CHECKOK(s_mp_grow(&temp, top));
   2409    MP_CHECKOK(s_mp_grow(&v, top));
   2410    MP_CHECKOK(s_mp_grow(&r, top));
   2411 
   2412    /* Upper bound for the total iterations. */
   2413    flen = mpl_significant_bits(&f);
   2414    glen = mpl_significant_bits(&g);
   2415    its = 4 + 3 * ((flen >= glen) ? flen : glen);
   2416 
   2417 #if defined(_MSC_VER)
   2418 #pragma warning(push)
   2419 #pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
   2420 #endif
   2421 
   2422    for (i = 0; i < its; i++) {
   2423        /* Step 1: conditional swap. */
   2424        /* Set cond if delta > 0 and g is odd. */
   2425        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
   2426        /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
   2427        delta = (-cond & -delta) | ((cond - 1) & delta);
   2428        SIGN(&f) ^= cond;
   2429        SIGN(&v) ^= cond;
   2430        /* If cond is set swap (f,v) with (g,r). */
   2431        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
   2432        MP_CHECKOK(mp_cswap(cond, &v, &r, top));
   2433 
   2434        /* Step 2: elemination. */
   2435        /* Update delta */
   2436        delta++;
   2437        /* If g is odd replace r with (r+v). */
   2438        MP_CHECKOK(mp_add(&r, &v, &temp));
   2439        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
   2440        /* If g is odd, right shift (g+f) else right shift g. */
   2441        MP_CHECKOK(mp_add(&g, &f, &temp));
   2442        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
   2443        s_mp_div_2(&g);
   2444        /*
   2445        If r is even, right shift it.
   2446        If r is odd, right shift (r+m) which is even because m is odd.
   2447        We want the result modulo m so adding in multiples of m here vanish.
   2448        */
   2449        MP_CHECKOK(mp_add(&r, m, &temp));
   2450        MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
   2451        s_mp_div_2(&r);
   2452    }
   2453 
   2454 #if defined(_MSC_VER)
   2455 #pragma warning(pop)
   2456 #endif
   2457 
   2458    /* We have the inverse in v, propagate sign from f. */
   2459    SIGN(&v) ^= SIGN(&f);
   2460    /* GCD is in f, take the absolute value. */
   2461    SIGN(&f) = ZPOS;
   2462 
   2463    /* If gcd != 1, not invertible. */
   2464    if (mp_cmp_d(&f, 1) != MP_EQ) {
   2465        res = MP_UNDEF;
   2466        goto CLEANUP;
   2467    }
   2468 
   2469    /* Return inverse modulo m. */
   2470    MP_CHECKOK(mp_mod(&v, m, c));
   2471 
   2472 CLEANUP:
   2473    while (last >= 0)
   2474        mp_clear(clear[last--]);
   2475    return res;
   2476 }
   2477 
   2478 /* Known good algorithm for computing modular inverse.  But slow. */
   2479 mp_err
   2480 mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
   2481 {
   2482    mp_int g, x;
   2483    mp_err res;
   2484 
   2485    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   2486 
   2487    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
   2488        return MP_RANGE;
   2489 
   2490    MP_DIGITS(&g) = 0;
   2491    MP_DIGITS(&x) = 0;
   2492    MP_CHECKOK(mp_init(&x));
   2493    MP_CHECKOK(mp_init(&g));
   2494 
   2495    MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
   2496 
   2497    if (mp_cmp_d(&g, 1) != MP_EQ) {
   2498        res = MP_UNDEF;
   2499        goto CLEANUP;
   2500    }
   2501 
   2502    res = mp_mod(&x, m, c);
   2503    SIGN(c) = SIGN(a);
   2504 
   2505 CLEANUP:
   2506    mp_clear(&x);
   2507    mp_clear(&g);
   2508 
   2509    return res;
   2510 }
   2511 
   2512 /* modular inverse where modulus is 2**k. */
   2513 /* c = a**-1 mod 2**k */
   2514 mp_err
   2515 s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
   2516 {
   2517    mp_err res;
   2518    mp_size ix = k + 4;
   2519    mp_int t0, t1, val, tmp, two2k;
   2520 
   2521    static const mp_digit d2 = 2;
   2522    static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
   2523 
   2524    if (mp_iseven(a))
   2525        return MP_UNDEF;
   2526 
   2527 #if defined(_MSC_VER)
   2528 #pragma warning(push)
   2529 #pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
   2530 #endif
   2531    if (k <= MP_DIGIT_BIT) {
   2532        mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
   2533        /* propagate the sign from mp_int */
   2534        i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
   2535        if (k < MP_DIGIT_BIT)
   2536            i &= ((mp_digit)1 << k) - (mp_digit)1;
   2537        mp_set(c, i);
   2538        return MP_OKAY;
   2539    }
   2540 #if defined(_MSC_VER)
   2541 #pragma warning(pop)
   2542 #endif
   2543 
   2544    MP_DIGITS(&t0) = 0;
   2545    MP_DIGITS(&t1) = 0;
   2546    MP_DIGITS(&val) = 0;
   2547    MP_DIGITS(&tmp) = 0;
   2548    MP_DIGITS(&two2k) = 0;
   2549    MP_CHECKOK(mp_init_copy(&val, a));
   2550    s_mp_mod_2d(&val, k);
   2551    MP_CHECKOK(mp_init_copy(&t0, &val));
   2552    MP_CHECKOK(mp_init_copy(&t1, &t0));
   2553    MP_CHECKOK(mp_init(&tmp));
   2554    MP_CHECKOK(mp_init(&two2k));
   2555    MP_CHECKOK(s_mp_2expt(&two2k, k));
   2556    do {
   2557        MP_CHECKOK(mp_mul(&val, &t1, &tmp));
   2558        MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
   2559        MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
   2560        s_mp_mod_2d(&t1, k);
   2561        while (MP_SIGN(&t1) != MP_ZPOS) {
   2562            MP_CHECKOK(mp_add(&t1, &two2k, &t1));
   2563        }
   2564        if (mp_cmp(&t1, &t0) == MP_EQ)
   2565            break;
   2566        MP_CHECKOK(mp_copy(&t1, &t0));
   2567    } while (--ix > 0);
   2568    if (!ix) {
   2569        res = MP_UNDEF;
   2570    } else {
   2571        mp_exch(c, &t1);
   2572    }
   2573 
   2574 CLEANUP:
   2575    mp_clear(&t0);
   2576    mp_clear(&t1);
   2577    mp_clear(&val);
   2578    mp_clear(&tmp);
   2579    mp_clear(&two2k);
   2580    return res;
   2581 }
   2582 
   2583 mp_err
   2584 s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
   2585 {
   2586    mp_err res;
   2587    mp_size k;
   2588    mp_int oddFactor, evenFactor; /* factors of the modulus */
   2589    mp_int oddPart, evenPart;     /* parts to combine via CRT. */
   2590    mp_int C2, tmp1, tmp2;
   2591 
   2592    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   2593 
   2594    /*static const mp_digit d1 = 1; */
   2595    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
   2596 
   2597    if ((res = s_mp_ispow2(m)) >= 0) {
   2598        k = res;
   2599        return s_mp_invmod_2d(a, k, c);
   2600    }
   2601    MP_DIGITS(&oddFactor) = 0;
   2602    MP_DIGITS(&evenFactor) = 0;
   2603    MP_DIGITS(&oddPart) = 0;
   2604    MP_DIGITS(&evenPart) = 0;
   2605    MP_DIGITS(&C2) = 0;
   2606    MP_DIGITS(&tmp1) = 0;
   2607    MP_DIGITS(&tmp2) = 0;
   2608 
   2609    MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
   2610    MP_CHECKOK(mp_init(&evenFactor));
   2611    MP_CHECKOK(mp_init(&oddPart));
   2612    MP_CHECKOK(mp_init(&evenPart));
   2613    MP_CHECKOK(mp_init(&C2));
   2614    MP_CHECKOK(mp_init(&tmp1));
   2615    MP_CHECKOK(mp_init(&tmp2));
   2616 
   2617    k = mp_trailing_zeros(m);
   2618    s_mp_div_2d(&oddFactor, k);
   2619    MP_CHECKOK(s_mp_2expt(&evenFactor, k));
   2620 
   2621    /* compute a**-1 mod oddFactor. */
   2622    MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
   2623    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
   2624    MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
   2625 
   2626    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
   2627    /* let m1 = oddFactor,  v1 = oddPart,
   2628     * let m2 = evenFactor, v2 = evenPart.
   2629     */
   2630 
   2631    /* Compute C2 = m1**-1 mod m2. */
   2632    MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
   2633 
   2634    /* compute u = (v2 - v1)*C2 mod m2 */
   2635    MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
   2636    MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
   2637    s_mp_mod_2d(&tmp2, k);
   2638    while (MP_SIGN(&tmp2) != MP_ZPOS) {
   2639        MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
   2640    }
   2641 
   2642    /* compute answer = v1 + u*m1 */
   2643    MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
   2644    MP_CHECKOK(mp_add(&oddPart, c, c));
   2645    /* not sure this is necessary, but it's low cost if not. */
   2646    MP_CHECKOK(mp_mod(c, m, c));
   2647 
   2648 CLEANUP:
   2649    mp_clear(&oddFactor);
   2650    mp_clear(&evenFactor);
   2651    mp_clear(&oddPart);
   2652    mp_clear(&evenPart);
   2653    mp_clear(&C2);
   2654    mp_clear(&tmp1);
   2655    mp_clear(&tmp2);
   2656    return res;
   2657 }
   2658 
   2659 /* {{{ mp_invmod(a, m, c) */
   2660 
   2661 /*
   2662  mp_invmod(a, m, c)
   2663 
   2664  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
   2665  This is equivalent to the question of whether (a, m) = 1.  If not,
   2666  MP_UNDEF is returned, and there is no inverse.
   2667 */
   2668 
   2669 mp_err
   2670 mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
   2671 {
   2672    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   2673 
   2674    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
   2675        return MP_RANGE;
   2676 
   2677    if (mp_isodd(m)) {
   2678        return s_mp_invmod_odd_m(a, m, c);
   2679    }
   2680    if (mp_iseven(a))
   2681        return MP_UNDEF; /* not invertable */
   2682 
   2683    return s_mp_invmod_even_m(a, m, c);
   2684 
   2685 } /* end mp_invmod() */
   2686 
   2687 /* }}} */
   2688 
   2689 /* }}} */
   2690 
   2691 /*------------------------------------------------------------------------*/
   2692 /* {{{ mp_print(mp, ofp) */
   2693 
   2694 #if MP_IOFUNC
   2695 /*
   2696  mp_print(mp, ofp)
   2697 
   2698  Print a textual representation of the given mp_int on the output
   2699  stream 'ofp'.  Output is generated using the internal radix.
   2700 */
   2701 
   2702 void
   2703 mp_print(mp_int *mp, FILE *ofp)
   2704 {
   2705    int ix;
   2706 
   2707    if (mp == NULL || ofp == NULL)
   2708        return;
   2709 
   2710    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
   2711 
   2712    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   2713        fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
   2714    }
   2715 
   2716 } /* end mp_print() */
   2717 
   2718 #endif /* if MP_IOFUNC */
   2719 
   2720 /* }}} */
   2721 
   2722 /*------------------------------------------------------------------------*/
   2723 /* {{{ More I/O Functions */
   2724 
   2725 /* {{{ mp_read_raw(mp, str, len) */
   2726 
   2727 /*
   2728   mp_read_raw(mp, str, len)
   2729 
   2730   Read in a raw value (base 256) into the given mp_int
   2731 */
   2732 
   2733 mp_err
   2734 mp_read_raw(mp_int *mp, char *str, int len)
   2735 {
   2736    int ix;
   2737    mp_err res;
   2738    unsigned char *ustr = (unsigned char *)str;
   2739 
   2740    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
   2741 
   2742    mp_zero(mp);
   2743 
   2744    /* Read the rest of the digits */
   2745    for (ix = 1; ix < len; ix++) {
   2746        if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
   2747            return res;
   2748        if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
   2749            return res;
   2750    }
   2751 
   2752    /* Get sign from first byte */
   2753    if (ustr[0])
   2754        SIGN(mp) = NEG;
   2755    else
   2756        SIGN(mp) = ZPOS;
   2757 
   2758    return MP_OKAY;
   2759 
   2760 } /* end mp_read_raw() */
   2761 
   2762 /* }}} */
   2763 
   2764 /* {{{ mp_raw_size(mp) */
   2765 
   2766 int
   2767 mp_raw_size(mp_int *mp)
   2768 {
   2769    ARGCHK(mp != NULL, 0);
   2770 
   2771    return (USED(mp) * sizeof(mp_digit)) + 1;
   2772 
   2773 } /* end mp_raw_size() */
   2774 
   2775 /* }}} */
   2776 
   2777 /* {{{ mp_toraw(mp, str) */
   2778 
   2779 mp_err
   2780 mp_toraw(mp_int *mp, char *str)
   2781 {
   2782    int ix, jx, pos = 1;
   2783 
   2784    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
   2785 
   2786    str[0] = (char)SIGN(mp);
   2787 
   2788    /* Iterate over each digit... */
   2789    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   2790        mp_digit d = DIGIT(mp, ix);
   2791 
   2792        /* Unpack digit bytes, high order first */
   2793        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
   2794            str[pos++] = (char)(d >> (jx * CHAR_BIT));
   2795        }
   2796    }
   2797 
   2798    return MP_OKAY;
   2799 
   2800 } /* end mp_toraw() */
   2801 
   2802 /* }}} */
   2803 
   2804 /* {{{ mp_read_radix(mp, str, radix) */
   2805 
   2806 /*
   2807  mp_read_radix(mp, str, radix)
   2808 
   2809  Read an integer from the given string, and set mp to the resulting
   2810  value.  The input is presumed to be in base 10.  Leading non-digit
   2811  characters are ignored, and the function reads until a non-digit
   2812  character or the end of the string.
   2813 */
   2814 
   2815 mp_err
   2816 mp_read_radix(mp_int *mp, const char *str, int radix)
   2817 {
   2818    int ix = 0, val = 0;
   2819    mp_err res;
   2820    mp_sign sig = ZPOS;
   2821 
   2822    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
   2823           MP_BADARG);
   2824 
   2825    mp_zero(mp);
   2826 
   2827    /* Skip leading non-digit characters until a digit or '-' or '+' */
   2828    while (str[ix] &&
   2829           (s_mp_tovalue(str[ix], radix) < 0) &&
   2830           str[ix] != '-' &&
   2831           str[ix] != '+') {
   2832        ++ix;
   2833    }
   2834 
   2835    if (str[ix] == '-') {
   2836        sig = NEG;
   2837        ++ix;
   2838    } else if (str[ix] == '+') {
   2839        sig = ZPOS; /* this is the default anyway... */
   2840        ++ix;
   2841    }
   2842 
   2843    while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
   2844        if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
   2845            return res;
   2846        if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
   2847            return res;
   2848        ++ix;
   2849    }
   2850 
   2851    if (s_mp_cmp_d(mp, 0) == MP_EQ)
   2852        SIGN(mp) = ZPOS;
   2853    else
   2854        SIGN(mp) = sig;
   2855 
   2856    return MP_OKAY;
   2857 
   2858 } /* end mp_read_radix() */
   2859 
   2860 mp_err
   2861 mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
   2862 {
   2863    int radix = default_radix;
   2864    int cx;
   2865    mp_sign sig = ZPOS;
   2866    mp_err res;
   2867 
   2868    /* Skip leading non-digit characters until a digit or '-' or '+' */
   2869    while ((cx = *str) != 0 &&
   2870           (s_mp_tovalue(cx, radix) < 0) &&
   2871           cx != '-' &&
   2872           cx != '+') {
   2873        ++str;
   2874    }
   2875 
   2876    if (cx == '-') {
   2877        sig = NEG;
   2878        ++str;
   2879    } else if (cx == '+') {
   2880        sig = ZPOS; /* this is the default anyway... */
   2881        ++str;
   2882    }
   2883 
   2884    if (str[0] == '0') {
   2885        if ((str[1] | 0x20) == 'x') {
   2886            radix = 16;
   2887            str += 2;
   2888        } else {
   2889            radix = 8;
   2890            str++;
   2891        }
   2892    }
   2893    res = mp_read_radix(a, str, radix);
   2894    if (res == MP_OKAY) {
   2895        MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
   2896    }
   2897    return res;
   2898 }
   2899 
   2900 /* }}} */
   2901 
   2902 /* {{{ mp_radix_size(mp, radix) */
   2903 
   2904 int
   2905 mp_radix_size(mp_int *mp, int radix)
   2906 {
   2907    int bits;
   2908 
   2909    if (!mp || radix < 2 || radix > MAX_RADIX)
   2910        return 0;
   2911 
   2912    bits = USED(mp) * DIGIT_BIT - 1;
   2913 
   2914    return SIGN(mp) + s_mp_outlen(bits, radix);
   2915 
   2916 } /* end mp_radix_size() */
   2917 
   2918 /* }}} */
   2919 
   2920 /* {{{ mp_toradix(mp, str, radix) */
   2921 
   2922 mp_err
   2923 mp_toradix(mp_int *mp, char *str, int radix)
   2924 {
   2925    int ix, pos = 0;
   2926 
   2927    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
   2928    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
   2929 
   2930    if (mp_cmp_z(mp) == MP_EQ) {
   2931        str[0] = '0';
   2932        str[1] = '\0';
   2933    } else {
   2934        mp_err res;
   2935        mp_int tmp;
   2936        mp_sign sgn;
   2937        mp_digit rem, rdx = (mp_digit)radix;
   2938        char ch;
   2939 
   2940        if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
   2941            return res;
   2942 
   2943        /* Save sign for later, and take absolute value */
   2944        sgn = SIGN(&tmp);
   2945        SIGN(&tmp) = ZPOS;
   2946 
   2947        /* Generate output digits in reverse order      */
   2948        while (mp_cmp_z(&tmp) != 0) {
   2949            if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
   2950                mp_clear(&tmp);
   2951                return res;
   2952            }
   2953 
   2954            /* Generate digits, use capital letters */
   2955            ch = s_mp_todigit(rem, radix, 0);
   2956 
   2957            str[pos++] = ch;
   2958        }
   2959 
   2960        /* Add - sign if original value was negative */
   2961        if (sgn == NEG)
   2962            str[pos++] = '-';
   2963 
   2964        /* Add trailing NUL to end the string        */
   2965        str[pos--] = '\0';
   2966 
   2967        /* Reverse the digits and sign indicator     */
   2968        ix = 0;
   2969        while (ix < pos) {
   2970            char tmpc = str[ix];
   2971 
   2972            str[ix] = str[pos];
   2973            str[pos] = tmpc;
   2974            ++ix;
   2975            --pos;
   2976        }
   2977 
   2978        mp_clear(&tmp);
   2979    }
   2980 
   2981    return MP_OKAY;
   2982 
   2983 } /* end mp_toradix() */
   2984 
   2985 /* }}} */
   2986 
   2987 /* {{{ mp_tovalue(ch, r) */
   2988 
   2989 int
   2990 mp_tovalue(char ch, int r)
   2991 {
   2992    return s_mp_tovalue(ch, r);
   2993 
   2994 } /* end mp_tovalue() */
   2995 
   2996 /* }}} */
   2997 
   2998 /* }}} */
   2999 
   3000 /* {{{ mp_strerror(ec) */
   3001 
   3002 /*
   3003  mp_strerror(ec)
   3004 
   3005  Return a string describing the meaning of error code 'ec'.  The
   3006  string returned is allocated in static memory, so the caller should
   3007  not attempt to modify or free the memory associated with this
   3008  string.
   3009 */
   3010 const char *
   3011 mp_strerror(mp_err ec)
   3012 {
   3013    int aec = (ec < 0) ? -ec : ec;
   3014 
   3015    /* Code values are negative, so the senses of these comparisons
   3016     are accurate */
   3017    if (ec < MP_LAST_CODE || ec > MP_OKAY) {
   3018        return mp_err_string[0]; /* unknown error code */
   3019    } else {
   3020        return mp_err_string[aec + 1];
   3021    }
   3022 
   3023 } /* end mp_strerror() */
   3024 
   3025 /* }}} */
   3026 
   3027 /*========================================================================*/
   3028 /*------------------------------------------------------------------------*/
   3029 /* Static function definitions (internal use only)                        */
   3030 
   3031 /* {{{ Memory management */
   3032 
   3033 /* {{{ s_mp_grow(mp, min) */
   3034 
   3035 /* Make sure there are at least 'min' digits allocated to mp              */
   3036 mp_err
   3037 s_mp_grow(mp_int *mp, mp_size min)
   3038 {
   3039    ARGCHK(mp != NULL, MP_BADARG);
   3040 
   3041    if (min > ALLOC(mp)) {
   3042        mp_digit *tmp;
   3043 
   3044        /* Set min to next nearest default precision block size */
   3045        min = MP_ROUNDUP(min, s_mp_defprec);
   3046 
   3047        if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
   3048            return MP_MEM;
   3049 
   3050        s_mp_copy(DIGITS(mp), tmp, USED(mp));
   3051 
   3052        s_mp_setz(DIGITS(mp), ALLOC(mp));
   3053        s_mp_free(DIGITS(mp));
   3054        DIGITS(mp) = tmp;
   3055        ALLOC(mp) = min;
   3056    }
   3057 
   3058    return MP_OKAY;
   3059 
   3060 } /* end s_mp_grow() */
   3061 
   3062 /* }}} */
   3063 
   3064 /* {{{ s_mp_pad(mp, min) */
   3065 
   3066 /* Make sure the used size of mp is at least 'min', growing if needed     */
   3067 mp_err
   3068 s_mp_pad(mp_int *mp, mp_size min)
   3069 {
   3070    ARGCHK(mp != NULL, MP_BADARG);
   3071 
   3072    if (min > USED(mp)) {
   3073        mp_err res;
   3074 
   3075        /* Make sure there is room to increase precision  */
   3076        if (min > ALLOC(mp)) {
   3077            if ((res = s_mp_grow(mp, min)) != MP_OKAY)
   3078                return res;
   3079        } else {
   3080            s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
   3081        }
   3082 
   3083        /* Increase precision; should already be 0-filled */
   3084        USED(mp) = min;
   3085    }
   3086 
   3087    return MP_OKAY;
   3088 
   3089 } /* end s_mp_pad() */
   3090 
   3091 /* }}} */
   3092 
   3093 /* {{{ s_mp_setz(dp, count) */
   3094 
   3095 /* Set 'count' digits pointed to by dp to be zeroes                       */
   3096 void
   3097 s_mp_setz(mp_digit *dp, mp_size count)
   3098 {
   3099    memset(dp, 0, count * sizeof(mp_digit));
   3100 } /* end s_mp_setz() */
   3101 
   3102 /* }}} */
   3103 
   3104 /* {{{ s_mp_copy(sp, dp, count) */
   3105 
   3106 /* Copy 'count' digits from sp to dp                                      */
   3107 void
   3108 s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
   3109 {
   3110    memcpy(dp, sp, count * sizeof(mp_digit));
   3111 } /* end s_mp_copy() */
   3112 
   3113 /* }}} */
   3114 
   3115 /* {{{ s_mp_alloc(nb, ni) */
   3116 
   3117 /* Allocate ni records of nb bytes each, and return a pointer to that     */
   3118 void *
   3119 s_mp_alloc(size_t nb, size_t ni)
   3120 {
   3121    return calloc(nb, ni);
   3122 
   3123 } /* end s_mp_alloc() */
   3124 
   3125 /* }}} */
   3126 
   3127 /* {{{ s_mp_free(ptr) */
   3128 
   3129 /* Free the memory pointed to by ptr                                      */
   3130 void
   3131 s_mp_free(void *ptr)
   3132 {
   3133    if (ptr) {
   3134        free(ptr);
   3135    }
   3136 } /* end s_mp_free() */
   3137 
   3138 /* }}} */
   3139 
   3140 /* {{{ s_mp_clamp(mp) */
   3141 
   3142 /* Remove leading zeroes from the given value                             */
   3143 void
   3144 s_mp_clamp(mp_int *mp)
   3145 {
   3146    mp_size used = MP_USED(mp);
   3147    while (used > 1 && DIGIT(mp, used - 1) == 0)
   3148        --used;
   3149    MP_USED(mp) = used;
   3150    if (used == 1 && DIGIT(mp, 0) == 0)
   3151        MP_SIGN(mp) = ZPOS;
   3152 } /* end s_mp_clamp() */
   3153 
   3154 /* }}} */
   3155 
   3156 /* {{{ s_mp_exch(a, b) */
   3157 
   3158 /* Exchange the data for a and b; (b, a) = (a, b)                         */
   3159 void
   3160 s_mp_exch(mp_int *a, mp_int *b)
   3161 {
   3162    mp_int tmp;
   3163    if (!a || !b) {
   3164        return;
   3165    }
   3166 
   3167    tmp = *a;
   3168    *a = *b;
   3169    *b = tmp;
   3170 
   3171 } /* end s_mp_exch() */
   3172 
   3173 /* }}} */
   3174 
   3175 /* }}} */
   3176 
   3177 /* {{{ Arithmetic helpers */
   3178 
   3179 /* {{{ s_mp_lshd(mp, p) */
   3180 
   3181 /*
   3182   Shift mp leftward by p digits, growing if needed, and zero-filling
   3183   the in-shifted digits at the right end.  This is a convenient
   3184   alternative to multiplication by powers of the radix
   3185 */
   3186 
   3187 mp_err
   3188 s_mp_lshd(mp_int *mp, mp_size p)
   3189 {
   3190    mp_err res;
   3191    unsigned int ix;
   3192 
   3193    ARGCHK(mp != NULL, MP_BADARG);
   3194 
   3195    if (p == 0)
   3196        return MP_OKAY;
   3197 
   3198    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
   3199        return MP_OKAY;
   3200 
   3201    if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
   3202        return res;
   3203 
   3204    /* Shift all the significant figures over as needed */
   3205    for (ix = USED(mp) - p; ix-- > 0;) {
   3206        DIGIT(mp, ix + p) = DIGIT(mp, ix);
   3207    }
   3208 
   3209    /* Fill the bottom digits with zeroes */
   3210    for (ix = 0; (mp_size)ix < p; ix++)
   3211        DIGIT(mp, ix) = 0;
   3212 
   3213    return MP_OKAY;
   3214 
   3215 } /* end s_mp_lshd() */
   3216 
   3217 /* }}} */
   3218 
   3219 /* {{{ s_mp_mul_2d(mp, d) */
   3220 
   3221 /*
   3222  Multiply the integer by 2^d, where d is a number of bits.  This
   3223  amounts to a bitwise shift of the value.
   3224 */
   3225 mp_err
   3226 s_mp_mul_2d(mp_int *mp, mp_digit d)
   3227 {
   3228    mp_err res;
   3229    mp_digit dshift, rshift, mask, x, prev = 0;
   3230    mp_digit *pa = NULL;
   3231    int i;
   3232 
   3233    ARGCHK(mp != NULL, MP_BADARG);
   3234 
   3235    dshift = d / MP_DIGIT_BIT;
   3236    d %= MP_DIGIT_BIT;
   3237    /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
   3238    /* mod and corresponding mask logic avoid that when d = 0 */
   3239    rshift = MP_DIGIT_BIT - d;
   3240    rshift %= MP_DIGIT_BIT;
   3241    /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
   3242    mask = (DIGIT_MAX << rshift) + 1;
   3243    mask &= DIGIT_MAX - 1;
   3244    /* bits to be shifted out of the top word */
   3245    x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
   3246 
   3247    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
   3248        return res;
   3249 
   3250    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
   3251        return res;
   3252 
   3253    pa = MP_DIGITS(mp) + dshift;
   3254 
   3255    for (i = MP_USED(mp) - dshift; i > 0; i--) {
   3256        x = *pa;
   3257        *pa++ = (x << d) | prev;
   3258        prev = (x & mask) >> rshift;
   3259    }
   3260 
   3261    s_mp_clamp(mp);
   3262    return MP_OKAY;
   3263 } /* end s_mp_mul_2d() */
   3264 
   3265 /* {{{ s_mp_rshd(mp, p) */
   3266 
   3267 /*
   3268   Shift mp rightward by p digits.  Maintains the invariant that
   3269   digits above the precision are all zero.  Digits shifted off the
   3270   end are lost.  Cannot fail.
   3271 */
   3272 
   3273 void
   3274 s_mp_rshd(mp_int *mp, mp_size p)
   3275 {
   3276    mp_size ix;
   3277    mp_digit *src, *dst;
   3278 
   3279    if (p == 0)
   3280        return;
   3281 
   3282    /* Shortcut when all digits are to be shifted off */
   3283    if (p >= USED(mp)) {
   3284        s_mp_setz(DIGITS(mp), ALLOC(mp));
   3285        USED(mp) = 1;
   3286        SIGN(mp) = ZPOS;
   3287        return;
   3288    }
   3289 
   3290    /* Shift all the significant figures over as needed */
   3291    dst = MP_DIGITS(mp);
   3292    src = dst + p;
   3293    for (ix = USED(mp) - p; ix > 0; ix--)
   3294        *dst++ = *src++;
   3295 
   3296    MP_USED(mp) -= p;
   3297    /* Fill the top digits with zeroes */
   3298    while (p-- > 0)
   3299        *dst++ = 0;
   3300 
   3301 } /* end s_mp_rshd() */
   3302 
   3303 /* }}} */
   3304 
   3305 /* {{{ s_mp_div_2(mp) */
   3306 
   3307 /* Divide by two -- take advantage of radix properties to do it fast      */
   3308 void
   3309 s_mp_div_2(mp_int *mp)
   3310 {
   3311    s_mp_div_2d(mp, 1);
   3312 
   3313 } /* end s_mp_div_2() */
   3314 
   3315 /* }}} */
   3316 
   3317 /* {{{ s_mp_mul_2(mp) */
   3318 
   3319 mp_err
   3320 s_mp_mul_2(mp_int *mp)
   3321 {
   3322    mp_digit *pd;
   3323    unsigned int ix, used;
   3324    mp_digit kin = 0;
   3325 
   3326    ARGCHK(mp != NULL, MP_BADARG);
   3327 
   3328    /* Shift digits leftward by 1 bit */
   3329    used = MP_USED(mp);
   3330    pd = MP_DIGITS(mp);
   3331    for (ix = 0; ix < used; ix++) {
   3332        mp_digit d = *pd;
   3333        *pd++ = (d << 1) | kin;
   3334        kin = (d >> (DIGIT_BIT - 1));
   3335    }
   3336 
   3337    /* Deal with rollover from last digit */
   3338    if (kin) {
   3339        if (ix >= ALLOC(mp)) {
   3340            mp_err res;
   3341            if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
   3342                return res;
   3343        }
   3344 
   3345        DIGIT(mp, ix) = kin;
   3346        USED(mp) += 1;
   3347    }
   3348 
   3349    return MP_OKAY;
   3350 
   3351 } /* end s_mp_mul_2() */
   3352 
   3353 /* }}} */
   3354 
   3355 /* {{{ s_mp_mod_2d(mp, d) */
   3356 
   3357 /*
   3358  Remainder the integer by 2^d, where d is a number of bits.  This
   3359  amounts to a bitwise AND of the value, and does not require the full
   3360  division code
   3361 */
   3362 void
   3363 s_mp_mod_2d(mp_int *mp, mp_digit d)
   3364 {
   3365    mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
   3366    mp_size ix;
   3367    mp_digit dmask;
   3368 
   3369    if (ndig >= USED(mp))
   3370        return;
   3371 
   3372    /* Flush all the bits above 2^d in its digit */
   3373    dmask = ((mp_digit)1 << nbit) - 1;
   3374    DIGIT(mp, ndig) &= dmask;
   3375 
   3376    /* Flush all digits above the one with 2^d in it */
   3377    for (ix = ndig + 1; ix < USED(mp); ix++)
   3378        DIGIT(mp, ix) = 0;
   3379 
   3380    s_mp_clamp(mp);
   3381 
   3382 } /* end s_mp_mod_2d() */
   3383 
   3384 /* }}} */
   3385 
   3386 /* {{{ s_mp_div_2d(mp, d) */
   3387 
   3388 /*
   3389  Divide the integer by 2^d, where d is a number of bits.  This
   3390  amounts to a bitwise shift of the value, and does not require the
   3391  full division code (used in Barrett reduction, see below)
   3392 */
   3393 void
   3394 s_mp_div_2d(mp_int *mp, mp_digit d)
   3395 {
   3396    int ix;
   3397    mp_digit save, next, mask, lshift;
   3398 
   3399    s_mp_rshd(mp, d / DIGIT_BIT);
   3400    d %= DIGIT_BIT;
   3401    /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
   3402    /* mod and corresponding mask logic avoid that when d = 0 */
   3403    lshift = DIGIT_BIT - d;
   3404    lshift %= DIGIT_BIT;
   3405    mask = ((mp_digit)1 << d) - 1;
   3406    save = 0;
   3407    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   3408        next = DIGIT(mp, ix) & mask;
   3409        DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
   3410        save = next;
   3411    }
   3412    s_mp_clamp(mp);
   3413 
   3414 } /* end s_mp_div_2d() */
   3415 
   3416 /* }}} */
   3417 
   3418 /* {{{ s_mp_norm(a, b, *d) */
   3419 
   3420 /*
   3421  s_mp_norm(a, b, *d)
   3422 
   3423  Normalize a and b for division, where b is the divisor.  In order
   3424  that we might make good guesses for quotient digits, we want the
   3425  leading digit of b to be at least half the radix, which we
   3426  accomplish by multiplying a and b by a power of 2.  The exponent
   3427  (shift count) is placed in *pd, so that the remainder can be shifted
   3428  back at the end of the division process.
   3429 */
   3430 
   3431 mp_err
   3432 s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
   3433 {
   3434    mp_digit d;
   3435    mp_digit mask;
   3436    mp_digit b_msd;
   3437    mp_err res = MP_OKAY;
   3438 
   3439    ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
   3440 
   3441    d = 0;
   3442    mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
   3443    b_msd = DIGIT(b, USED(b) - 1);
   3444    while (!(b_msd & mask)) {
   3445        b_msd <<= 1;
   3446        ++d;
   3447    }
   3448 
   3449    if (d) {
   3450        MP_CHECKOK(s_mp_mul_2d(a, d));
   3451        MP_CHECKOK(s_mp_mul_2d(b, d));
   3452    }
   3453 
   3454    *pd = d;
   3455 CLEANUP:
   3456    return res;
   3457 
   3458 } /* end s_mp_norm() */
   3459 
   3460 /* }}} */
   3461 
   3462 /* }}} */
   3463 
   3464 /* {{{ Primitive digit arithmetic */
   3465 
   3466 /* {{{ s_mp_add_d(mp, d) */
   3467 
   3468 /* Add d to |mp| in place                                                 */
   3469 mp_err
   3470 s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
   3471 {
   3472 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3473    mp_word w, k = 0;
   3474    mp_size ix = 1;
   3475 
   3476    w = (mp_word)DIGIT(mp, 0) + d;
   3477    DIGIT(mp, 0) = ACCUM(w);
   3478    k = CARRYOUT(w);
   3479 
   3480    while (ix < USED(mp) && k) {
   3481        w = (mp_word)DIGIT(mp, ix) + k;
   3482        DIGIT(mp, ix) = ACCUM(w);
   3483        k = CARRYOUT(w);
   3484        ++ix;
   3485    }
   3486 
   3487    if (k != 0) {
   3488        mp_err res;
   3489 
   3490        if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
   3491            return res;
   3492 
   3493        DIGIT(mp, ix) = (mp_digit)k;
   3494    }
   3495 
   3496    return MP_OKAY;
   3497 #else
   3498    mp_digit *pmp = MP_DIGITS(mp);
   3499    mp_digit sum, mp_i, carry = 0;
   3500    mp_err res = MP_OKAY;
   3501    int used = (int)MP_USED(mp);
   3502 
   3503    mp_i = *pmp;
   3504    *pmp++ = sum = d + mp_i;
   3505    carry = (sum < d);
   3506    while (carry && --used > 0) {
   3507        mp_i = *pmp;
   3508        *pmp++ = sum = carry + mp_i;
   3509        carry = !sum;
   3510    }
   3511    if (carry && !used) {
   3512        /* mp is growing */
   3513        used = MP_USED(mp);
   3514        MP_CHECKOK(s_mp_pad(mp, used + 1));
   3515        MP_DIGIT(mp, used) = carry;
   3516    }
   3517 CLEANUP:
   3518    return res;
   3519 #endif
   3520 } /* end s_mp_add_d() */
   3521 
   3522 /* }}} */
   3523 
   3524 /* {{{ s_mp_sub_d(mp, d) */
   3525 
   3526 /* Subtract d from |mp| in place, assumes |mp| > d                        */
   3527 mp_err
   3528 s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
   3529 {
   3530 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   3531    mp_word w, b = 0;
   3532    mp_size ix = 1;
   3533 
   3534    /* Compute initial subtraction    */
   3535    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
   3536    b = CARRYOUT(w) ? 0 : 1;
   3537    DIGIT(mp, 0) = ACCUM(w);
   3538 
   3539    /* Propagate borrows leftward     */
   3540    while (b && ix < USED(mp)) {
   3541        w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
   3542        b = CARRYOUT(w) ? 0 : 1;
   3543        DIGIT(mp, ix) = ACCUM(w);
   3544        ++ix;
   3545    }
   3546 
   3547    /* Remove leading zeroes          */
   3548    s_mp_clamp(mp);
   3549 
   3550    /* If we have a borrow out, it's a violation of the input invariant */
   3551    if (b)
   3552        return MP_RANGE;
   3553    else
   3554        return MP_OKAY;
   3555 #else
   3556    mp_digit *pmp = MP_DIGITS(mp);
   3557    mp_digit mp_i, diff, borrow;
   3558    mp_size used = MP_USED(mp);
   3559 
   3560    mp_i = *pmp;
   3561    *pmp++ = diff = mp_i - d;
   3562    borrow = (diff > mp_i);
   3563    while (borrow && --used) {
   3564        mp_i = *pmp;
   3565        *pmp++ = diff = mp_i - borrow;
   3566        borrow = (diff > mp_i);
   3567    }
   3568    s_mp_clamp(mp);
   3569    return (borrow && !used) ? MP_RANGE : MP_OKAY;
   3570 #endif
   3571 } /* end s_mp_sub_d() */
   3572 
   3573 /* }}} */
   3574 
   3575 /* {{{ s_mp_mul_d(a, d) */
   3576 
   3577 /* Compute a = a * d, single digit multiplication                         */
   3578 mp_err
   3579 s_mp_mul_d(mp_int *a, mp_digit d)
   3580 {
   3581    mp_err res;
   3582    mp_size used;
   3583    int pow;
   3584 
   3585    if (!d) {
   3586        mp_zero(a);
   3587        return MP_OKAY;
   3588    }
   3589    if (d == 1)
   3590        return MP_OKAY;
   3591    if (0 <= (pow = s_mp_ispow2d(d))) {
   3592        return s_mp_mul_2d(a, (mp_digit)pow);
   3593    }
   3594 
   3595    used = MP_USED(a);
   3596    MP_CHECKOK(s_mp_pad(a, used + 1));
   3597 
   3598    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
   3599 
   3600    s_mp_clamp(a);
   3601 
   3602 CLEANUP:
   3603    return res;
   3604 
   3605 } /* end s_mp_mul_d() */
   3606 
   3607 /* }}} */
   3608 
   3609 /* {{{ s_mp_div_d(mp, d, r) */
   3610 
   3611 /*
   3612  s_mp_div_d(mp, d, r)
   3613 
   3614  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
   3615  single digit d.  If r is null, the remainder will be discarded.
   3616 */
   3617 
   3618 mp_err
   3619 s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
   3620 {
   3621 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
   3622    mp_word w = 0, q;
   3623 #else
   3624    mp_digit w = 0, q;
   3625 #endif
   3626    int ix;
   3627    mp_err res;
   3628    mp_int quot;
   3629    mp_int rem;
   3630 
   3631    if (d == 0)
   3632        return MP_RANGE;
   3633    if (d == 1) {
   3634        if (r)
   3635            *r = 0;
   3636        return MP_OKAY;
   3637    }
   3638    /* could check for power of 2 here, but mp_div_d does that. */
   3639    if (MP_USED(mp) == 1) {
   3640        mp_digit n = MP_DIGIT(mp, 0);
   3641        mp_digit remdig;
   3642 
   3643        q = n / d;
   3644        remdig = n % d;
   3645        MP_DIGIT(mp, 0) = q;
   3646        if (r) {
   3647            *r = remdig;
   3648        }
   3649        return MP_OKAY;
   3650    }
   3651 
   3652    MP_DIGITS(&rem) = 0;
   3653    MP_DIGITS(&quot) = 0;
   3654    /* Make room for the quotient */
   3655    MP_CHECKOK(mp_init_size(&quot, USED(mp)));
   3656 
   3657 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
   3658    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   3659        w = (w << DIGIT_BIT) | DIGIT(mp, ix);
   3660 
   3661        if (w >= d) {
   3662            q = w / d;
   3663            w = w % d;
   3664        } else {
   3665            q = 0;
   3666        }
   3667 
   3668        s_mp_lshd(&quot, 1);
   3669        DIGIT(&quot, 0) = (mp_digit)q;
   3670    }
   3671 #else
   3672    {
   3673        mp_digit p;
   3674 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
   3675        mp_digit norm;
   3676 #endif
   3677 
   3678        MP_CHECKOK(mp_init_copy(&rem, mp));
   3679 
   3680 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
   3681        MP_DIGIT(&quot, 0) = d;
   3682        MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
   3683        if (norm)
   3684            d <<= norm;
   3685        MP_DIGIT(&quot, 0) = 0;
   3686 #endif
   3687 
   3688        p = 0;
   3689        for (ix = USED(&rem) - 1; ix >= 0; ix--) {
   3690            w = DIGIT(&rem, ix);
   3691 
   3692            if (p) {
   3693                MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
   3694            } else if (w >= d) {
   3695                q = w / d;
   3696                w = w % d;
   3697            } else {
   3698                q = 0;
   3699            }
   3700 
   3701            MP_CHECKOK(s_mp_lshd(&quot, 1));
   3702            DIGIT(&quot, 0) = q;
   3703            p = w;
   3704        }
   3705 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
   3706        if (norm)
   3707            w >>= norm;
   3708 #endif
   3709    }
   3710 #endif
   3711 
   3712    /* Deliver the remainder, if desired */
   3713    if (r) {
   3714        *r = (mp_digit)w;
   3715    }
   3716 
   3717    s_mp_clamp(&quot);
   3718    mp_exch(&quot, mp);
   3719 CLEANUP:
   3720    mp_clear(&quot);
   3721    mp_clear(&rem);
   3722 
   3723    return res;
   3724 } /* end s_mp_div_d() */
   3725 
   3726 /* }}} */
   3727 
   3728 /* }}} */
   3729 
   3730 /* {{{ Primitive full arithmetic */
   3731 
   3732 /* {{{ s_mp_add(a, b) */
   3733 
   3734 /* Compute a = |a| + |b|                                                  */
   3735 mp_err
   3736 s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition      */
   3737 {
   3738 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3739    mp_word w = 0;
   3740 #else
   3741    mp_digit d, sum, carry = 0;
   3742 #endif
   3743    mp_digit *pa, *pb;
   3744    mp_size ix;
   3745    mp_size used;
   3746    mp_err res;
   3747 
   3748    /* Make sure a has enough precision for the output value */
   3749    if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
   3750        return res;
   3751 
   3752    /*
   3753      Add up all digits up to the precision of b.  If b had initially
   3754      the same precision as a, or greater, we took care of it by the
   3755      padding step above, so there is no problem.  If b had initially
   3756      less precision, we'll have to make sure the carry out is duly
   3757      propagated upward among the higher-order digits of the sum.
   3758     */
   3759    pa = MP_DIGITS(a);
   3760    pb = MP_DIGITS(b);
   3761    used = MP_USED(b);
   3762    for (ix = 0; ix < used; ix++) {
   3763 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3764        w = w + *pa + *pb++;
   3765        *pa++ = ACCUM(w);
   3766        w = CARRYOUT(w);
   3767 #else
   3768        d = *pa;
   3769        sum = d + *pb++;
   3770        d = (sum < d); /* detect overflow */
   3771        *pa++ = sum += carry;
   3772        carry = d + (sum < carry); /* detect overflow */
   3773 #endif
   3774    }
   3775 
   3776    /* If we run out of 'b' digits before we're actually done, make
   3777       sure the carries get propagated upward...
   3778     */
   3779    used = MP_USED(a);
   3780 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3781    while (w && ix < used) {
   3782        w = w + *pa;
   3783        *pa++ = ACCUM(w);
   3784        w = CARRYOUT(w);
   3785        ++ix;
   3786    }
   3787 #else
   3788    while (carry && ix < used) {
   3789        sum = carry + *pa;
   3790        *pa++ = sum;
   3791        carry = !sum;
   3792        ++ix;
   3793    }
   3794 #endif
   3795 
   3796 /* If there's an overall carry out, increase precision and include
   3797     it.  We could have done this initially, but why touch the memory
   3798     allocator unless we're sure we have to?
   3799   */
   3800 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3801    if (w) {
   3802        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
   3803            return res;
   3804 
   3805        DIGIT(a, ix) = (mp_digit)w;
   3806    }
   3807 #else
   3808    if (carry) {
   3809        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
   3810            return res;
   3811 
   3812        DIGIT(a, used) = carry;
   3813    }
   3814 #endif
   3815 
   3816    return MP_OKAY;
   3817 } /* end s_mp_add() */
   3818 
   3819 /* }}} */
   3820 
   3821 /* Compute c = |a| + |b|         */ /* magnitude addition      */
   3822 mp_err
   3823 s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
   3824 {
   3825    mp_digit *pa, *pb, *pc;
   3826 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3827    mp_word w = 0;
   3828 #else
   3829    mp_digit sum, carry = 0, d;
   3830 #endif
   3831    mp_size ix;
   3832    mp_size used;
   3833    mp_err res;
   3834 
   3835    MP_SIGN(c) = MP_SIGN(a);
   3836    if (MP_USED(a) < MP_USED(b)) {
   3837        const mp_int *xch = a;
   3838        a = b;
   3839        b = xch;
   3840    }
   3841 
   3842    /* Make sure a has enough precision for the output value */
   3843    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
   3844        return res;
   3845 
   3846    /*
   3847     Add up all digits up to the precision of b.  If b had initially
   3848     the same precision as a, or greater, we took care of it by the
   3849     exchange step above, so there is no problem.  If b had initially
   3850     less precision, we'll have to make sure the carry out is duly
   3851     propagated upward among the higher-order digits of the sum.
   3852    */
   3853    pa = MP_DIGITS(a);
   3854    pb = MP_DIGITS(b);
   3855    pc = MP_DIGITS(c);
   3856    used = MP_USED(b);
   3857    for (ix = 0; ix < used; ix++) {
   3858 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3859        w = w + *pa++ + *pb++;
   3860        *pc++ = ACCUM(w);
   3861        w = CARRYOUT(w);
   3862 #else
   3863        d = *pa++;
   3864        sum = d + *pb++;
   3865        d = (sum < d); /* detect overflow */
   3866        *pc++ = sum += carry;
   3867        carry = d + (sum < carry); /* detect overflow */
   3868 #endif
   3869    }
   3870 
   3871    /* If we run out of 'b' digits before we're actually done, make
   3872     sure the carries get propagated upward...
   3873   */
   3874    for (used = MP_USED(a); ix < used; ++ix) {
   3875 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3876        w = w + *pa++;
   3877        *pc++ = ACCUM(w);
   3878        w = CARRYOUT(w);
   3879 #else
   3880        *pc++ = sum = carry + *pa++;
   3881        carry = (sum < carry);
   3882 #endif
   3883    }
   3884 
   3885 /* If there's an overall carry out, increase precision and include
   3886     it.  We could have done this initially, but why touch the memory
   3887     allocator unless we're sure we have to?
   3888   */
   3889 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3890    if (w) {
   3891        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
   3892            return res;
   3893 
   3894        DIGIT(c, used) = (mp_digit)w;
   3895        ++used;
   3896    }
   3897 #else
   3898    if (carry) {
   3899        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
   3900            return res;
   3901 
   3902        DIGIT(c, used) = carry;
   3903        ++used;
   3904    }
   3905 #endif
   3906    MP_USED(c) = used;
   3907    return MP_OKAY;
   3908 }
   3909 /* {{{ s_mp_add_offset(a, b, offset) */
   3910 
   3911 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
   3912 mp_err
   3913 s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
   3914 {
   3915 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3916    mp_word w, k = 0;
   3917 #else
   3918    mp_digit d, sum, carry = 0;
   3919 #endif
   3920    mp_size ib;
   3921    mp_size ia;
   3922    mp_size lim;
   3923    mp_err res;
   3924 
   3925    /* Make sure a has enough precision for the output value */
   3926    lim = MP_USED(b) + offset;
   3927    if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
   3928        return res;
   3929 
   3930    /*
   3931    Add up all digits up to the precision of b.  If b had initially
   3932    the same precision as a, or greater, we took care of it by the
   3933    padding step above, so there is no problem.  If b had initially
   3934    less precision, we'll have to make sure the carry out is duly
   3935    propagated upward among the higher-order digits of the sum.
   3936   */
   3937    lim = USED(b);
   3938    for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
   3939 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3940        w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
   3941        DIGIT(a, ia) = ACCUM(w);
   3942        k = CARRYOUT(w);
   3943 #else
   3944        d = MP_DIGIT(a, ia);
   3945        sum = d + MP_DIGIT(b, ib);
   3946        d = (sum < d);
   3947        MP_DIGIT(a, ia) = sum += carry;
   3948        carry = d + (sum < carry);
   3949 #endif
   3950    }
   3951 
   3952 /* If we run out of 'b' digits before we're actually done, make
   3953     sure the carries get propagated upward...
   3954   */
   3955 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3956    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
   3957        w = (mp_word)DIGIT(a, ia) + k;
   3958        DIGIT(a, ia) = ACCUM(w);
   3959        k = CARRYOUT(w);
   3960    }
   3961 #else
   3962    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
   3963        d = MP_DIGIT(a, ia);
   3964        MP_DIGIT(a, ia) = sum = d + carry;
   3965        carry = (sum < d);
   3966    }
   3967 #endif
   3968 
   3969 /* If there's an overall carry out, increase precision and include
   3970     it.  We could have done this initially, but why touch the memory
   3971     allocator unless we're sure we have to?
   3972   */
   3973 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
   3974    if (k) {
   3975        if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
   3976            return res;
   3977 
   3978        DIGIT(a, ia) = (mp_digit)k;
   3979    }
   3980 #else
   3981    if (carry) {
   3982        if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
   3983            return res;
   3984 
   3985        DIGIT(a, lim) = carry;
   3986    }
   3987 #endif
   3988    s_mp_clamp(a);
   3989 
   3990    return MP_OKAY;
   3991 
   3992 } /* end s_mp_add_offset() */
   3993 
   3994 /* }}} */
   3995 
   3996 /* {{{ s_mp_sub(a, b) */
   3997 
   3998 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
   3999 mp_err
   4000 s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract      */
   4001 {
   4002    mp_digit *pa, *pb, *limit;
   4003 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4004    mp_sword w = 0;
   4005 #else
   4006    mp_digit d, diff, borrow = 0;
   4007 #endif
   4008 
   4009    /*
   4010    Subtract and propagate borrow.  Up to the precision of b, this
   4011    accounts for the digits of b; after that, we just make sure the
   4012    carries get to the right place.  This saves having to pad b out to
   4013    the precision of a just to make the loops work right...
   4014   */
   4015    pa = MP_DIGITS(a);
   4016    pb = MP_DIGITS(b);
   4017    limit = pb + MP_USED(b);
   4018    while (pb < limit) {
   4019 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4020        w = w + *pa - *pb++;
   4021        *pa++ = ACCUM(w);
   4022        w >>= MP_DIGIT_BIT;
   4023 #else
   4024        d = *pa;
   4025        diff = d - *pb++;
   4026        d = (diff > d); /* detect borrow */
   4027        if (borrow && --diff == MP_DIGIT_MAX)
   4028            ++d;
   4029        *pa++ = diff;
   4030        borrow = d;
   4031 #endif
   4032    }
   4033    limit = MP_DIGITS(a) + MP_USED(a);
   4034 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4035    while (w && pa < limit) {
   4036        w = w + *pa;
   4037        *pa++ = ACCUM(w);
   4038        w >>= MP_DIGIT_BIT;
   4039    }
   4040 #else
   4041    while (borrow && pa < limit) {
   4042        d = *pa;
   4043        *pa++ = diff = d - borrow;
   4044        borrow = (diff > d);
   4045    }
   4046 #endif
   4047 
   4048    /* Clobber any leading zeroes we created    */
   4049    s_mp_clamp(a);
   4050 
   4051 /*
   4052     If there was a borrow out, then |b| > |a| in violation
   4053     of our input invariant.  We've already done the work,
   4054     but we'll at least complain about it...
   4055   */
   4056 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4057    return w ? MP_RANGE : MP_OKAY;
   4058 #else
   4059    return borrow ? MP_RANGE : MP_OKAY;
   4060 #endif
   4061 } /* end s_mp_sub() */
   4062 
   4063 /* }}} */
   4064 
   4065 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
   4066 mp_err
   4067 s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
   4068 {
   4069    mp_digit *pa, *pb, *pc;
   4070 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4071    mp_sword w = 0;
   4072 #else
   4073    mp_digit d, diff, borrow = 0;
   4074 #endif
   4075    int ix, limit;
   4076    mp_err res;
   4077 
   4078    MP_SIGN(c) = MP_SIGN(a);
   4079 
   4080    /* Make sure a has enough precision for the output value */
   4081    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
   4082        return res;
   4083 
   4084    /*
   4085    Subtract and propagate borrow.  Up to the precision of b, this
   4086    accounts for the digits of b; after that, we just make sure the
   4087    carries get to the right place.  This saves having to pad b out to
   4088    the precision of a just to make the loops work right...
   4089   */
   4090    pa = MP_DIGITS(a);
   4091    pb = MP_DIGITS(b);
   4092    pc = MP_DIGITS(c);
   4093    limit = MP_USED(b);
   4094    for (ix = 0; ix < limit; ++ix) {
   4095 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4096        w = w + *pa++ - *pb++;
   4097        *pc++ = ACCUM(w);
   4098        w >>= MP_DIGIT_BIT;
   4099 #else
   4100        d = *pa++;
   4101        diff = d - *pb++;
   4102        d = (diff > d);
   4103        if (borrow && --diff == MP_DIGIT_MAX)
   4104            ++d;
   4105        *pc++ = diff;
   4106        borrow = d;
   4107 #endif
   4108    }
   4109    for (limit = MP_USED(a); ix < limit; ++ix) {
   4110 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4111        w = w + *pa++;
   4112        *pc++ = ACCUM(w);
   4113        w >>= MP_DIGIT_BIT;
   4114 #else
   4115        d = *pa++;
   4116        *pc++ = diff = d - borrow;
   4117        borrow = (diff > d);
   4118 #endif
   4119    }
   4120 
   4121    /* Clobber any leading zeroes we created    */
   4122    MP_USED(c) = ix;
   4123    s_mp_clamp(c);
   4124 
   4125 /*
   4126     If there was a borrow out, then |b| > |a| in violation
   4127     of our input invariant.  We've already done the work,
   4128     but we'll at least complain about it...
   4129   */
   4130 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
   4131    return w ? MP_RANGE : MP_OKAY;
   4132 #else
   4133    return borrow ? MP_RANGE : MP_OKAY;
   4134 #endif
   4135 }
   4136 /* {{{ s_mp_mul(a, b) */
   4137 
   4138 /* Compute a = |a| * |b|                                                  */
   4139 mp_err
   4140 s_mp_mul(mp_int *a, const mp_int *b)
   4141 {
   4142    return mp_mul(a, b, a);
   4143 } /* end s_mp_mul() */
   4144 
   4145 /* }}} */
   4146 
   4147 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
   4148 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
   4149 #define MP_MUL_DxD(a, b, Phi, Plo)                              \
   4150    {                                                           \
   4151        unsigned long long product = (unsigned long long)a * b; \
   4152        Plo = (mp_digit)product;                                \
   4153        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
   4154    }
   4155 #else
   4156 #define MP_MUL_DxD(a, b, Phi, Plo)                                 \
   4157    {                                                              \
   4158        mp_digit a0b1, a1b0;                                       \
   4159        Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
   4160        Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
   4161        a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
   4162        a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
   4163        a1b0 += a0b1;                                              \
   4164        Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
   4165        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
   4166        a1b0 <<= MP_HALF_DIGIT_BIT;                                \
   4167        Plo += a1b0;                                               \
   4168        Phi += MP_CT_LTU(Plo, a1b0);                               \
   4169    }
   4170 #endif
   4171 
   4172 /* Constant time version of s_mpv_mul_d_add_prop.
   4173 * Presently, this is only used by the Constant time Montgomery arithmetic code. */
   4174 /* c += a * b */
   4175 void
   4176 s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
   4177                       mp_digit *c, mp_size c_len)
   4178 {
   4179 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
   4180    mp_digit d = 0;
   4181 
   4182    c_len -= a_len;
   4183    /* Inner product:  Digits of a */
   4184    while (a_len--) {
   4185        mp_word w = ((mp_word)b * *a++) + *c + d;
   4186        *c++ = ACCUM(w);
   4187        d = CARRYOUT(w);
   4188    }
   4189 
   4190    /* propagate the carry to the end, even if carry is zero */
   4191    while (c_len--) {
   4192        mp_word w = (mp_word)*c + d;
   4193        *c++ = ACCUM(w);
   4194        d = CARRYOUT(w);
   4195    }
   4196 #else
   4197    mp_digit carry = 0;
   4198    c_len -= a_len;
   4199    while (a_len--) {
   4200        mp_digit a_i = *a++;
   4201        mp_digit a0b0, a1b1;
   4202        MP_MUL_DxD(a_i, b, a1b1, a0b0);
   4203 
   4204        a0b0 += carry;
   4205        a1b1 += MP_CT_LTU(a0b0, carry);
   4206        a0b0 += a_i = *c;
   4207        a1b1 += MP_CT_LTU(a0b0, a_i);
   4208 
   4209        *c++ = a0b0;
   4210        carry = a1b1;
   4211    }
   4212    /* propagate the carry to the end, even if carry is zero */
   4213    while (c_len--) {
   4214        mp_digit c_i = *c;
   4215        carry += c_i;
   4216        *c++ = carry;
   4217        carry = MP_CT_LTU(carry, c_i);
   4218    }
   4219 #endif
   4220 }
   4221 
   4222 #if !defined(MP_ASSEMBLY_MULTIPLY)
   4223 /* c = a * b */
   4224 void
   4225 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
   4226 {
   4227 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
   4228    mp_digit d = 0;
   4229 
   4230    /* Inner product:  Digits of a */
   4231    while (a_len--) {
   4232        mp_word w = ((mp_word)b * *a++) + d;
   4233        *c++ = ACCUM(w);
   4234        d = CARRYOUT(w);
   4235    }
   4236    *c = d;
   4237 #else
   4238    mp_digit carry = 0;
   4239    while (a_len--) {
   4240        mp_digit a_i = *a++;
   4241        mp_digit a0b0, a1b1;
   4242 
   4243        MP_MUL_DxD(a_i, b, a1b1, a0b0);
   4244 
   4245        a0b0 += carry;
   4246        a1b1 += MP_CT_LTU(a0b0, carry);
   4247        *c++ = a0b0;
   4248        carry = a1b1;
   4249    }
   4250    *c = carry;
   4251 #endif
   4252 }
   4253 
   4254 /* c += a * b */
   4255 void
   4256 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
   4257                mp_digit *c)
   4258 {
   4259 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
   4260    mp_digit d = 0;
   4261 
   4262    /* Inner product:  Digits of a */
   4263    while (a_len--) {
   4264        mp_word w = ((mp_word)b * *a++) + *c + d;
   4265        *c++ = ACCUM(w);
   4266        d = CARRYOUT(w);
   4267    }
   4268    *c = d;
   4269 #else
   4270    mp_digit carry = 0;
   4271    while (a_len--) {
   4272        mp_digit a_i = *a++;
   4273        mp_digit a0b0, a1b1;
   4274 
   4275        MP_MUL_DxD(a_i, b, a1b1, a0b0);
   4276 
   4277        a0b0 += carry;
   4278        a1b1 += MP_CT_LTU(a0b0, carry);
   4279        a0b0 += a_i = *c;
   4280        a1b1 += MP_CT_LTU(a0b0, a_i);
   4281        *c++ = a0b0;
   4282        carry = a1b1;
   4283    }
   4284    *c = carry;
   4285 #endif
   4286 }
   4287 
   4288 /* Presently, this is only used by the Montgomery arithmetic code. */
   4289 /* c += a * b */
   4290 void
   4291 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
   4292 {
   4293 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
   4294    mp_digit d = 0;
   4295 
   4296    /* Inner product:  Digits of a */
   4297    while (a_len--) {
   4298        mp_word w = ((mp_word)b * *a++) + *c + d;
   4299        *c++ = ACCUM(w);
   4300        d = CARRYOUT(w);
   4301    }
   4302 
   4303    while (d) {
   4304        mp_word w = (mp_word)*c + d;
   4305        *c++ = ACCUM(w);
   4306        d = CARRYOUT(w);
   4307    }
   4308 #else
   4309    mp_digit carry = 0;
   4310    while (a_len--) {
   4311        mp_digit a_i = *a++;
   4312        mp_digit a0b0, a1b1;
   4313 
   4314        MP_MUL_DxD(a_i, b, a1b1, a0b0);
   4315 
   4316        a0b0 += carry;
   4317        if (a0b0 < carry)
   4318            ++a1b1;
   4319 
   4320        a0b0 += a_i = *c;
   4321        if (a0b0 < a_i)
   4322            ++a1b1;
   4323 
   4324        *c++ = a0b0;
   4325        carry = a1b1;
   4326    }
   4327    while (carry) {
   4328        mp_digit c_i = *c;
   4329        carry += c_i;
   4330        *c++ = carry;
   4331        carry = carry < c_i;
   4332    }
   4333 #endif
   4334 }
   4335 #endif
   4336 
   4337 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
   4338 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
   4339 #define MP_SQR_D(a, Phi, Plo)                                  \
   4340    {                                                          \
   4341        unsigned long long square = (unsigned long long)a * a; \
   4342        Plo = (mp_digit)square;                                \
   4343        Phi = (mp_digit)(square >> MP_DIGIT_BIT);              \
   4344    }
   4345 #else
   4346 #define MP_SQR_D(a, Phi, Plo)                                      \
   4347    {                                                              \
   4348        mp_digit Pmid;                                             \
   4349        Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX);   \
   4350        Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
   4351        Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
   4352        Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);                    \
   4353        Pmid <<= (MP_HALF_DIGIT_BIT + 1);                          \
   4354        Plo += Pmid;                                               \
   4355        if (Plo < Pmid)                                            \
   4356            ++Phi;                                                 \
   4357    }
   4358 #endif
   4359 
   4360 #if !defined(MP_ASSEMBLY_SQUARE)
   4361 /* Add the squares of the digits of a to the digits of b. */
   4362 void
   4363 s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
   4364 {
   4365 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
   4366    mp_word w;
   4367    mp_digit d;
   4368    mp_size ix;
   4369 
   4370    w = 0;
   4371 #define ADD_SQUARE(n)                     \
   4372    d = pa[n];                            \
   4373    w += (d * (mp_word)d) + ps[2 * n];    \
   4374    ps[2 * n] = ACCUM(w);                 \
   4375    w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
   4376    ps[2 * n + 1] = ACCUM(w);             \
   4377    w = (w >> DIGIT_BIT)
   4378 
   4379    for (ix = a_len; ix >= 4; ix -= 4) {
   4380        ADD_SQUARE(0);
   4381        ADD_SQUARE(1);
   4382        ADD_SQUARE(2);
   4383        ADD_SQUARE(3);
   4384        pa += 4;
   4385        ps += 8;
   4386    }
   4387    if (ix) {
   4388        ps += 2 * ix;
   4389        pa += ix;
   4390        switch (ix) {
   4391            case 3:
   4392                ADD_SQUARE(-3); /* FALLTHRU */
   4393            case 2:
   4394                ADD_SQUARE(-2); /* FALLTHRU */
   4395            case 1:
   4396                ADD_SQUARE(-1); /* FALLTHRU */
   4397            case 0:
   4398                break;
   4399        }
   4400    }
   4401    while (w) {
   4402        w += *ps;
   4403        *ps++ = ACCUM(w);
   4404        w = (w >> DIGIT_BIT);
   4405    }
   4406 #else
   4407    mp_digit carry = 0;
   4408    while (a_len--) {
   4409        mp_digit a_i = *pa++;
   4410        mp_digit a0a0, a1a1;
   4411 
   4412        MP_SQR_D(a_i, a1a1, a0a0);
   4413 
   4414        /* here a1a1 and a0a0 constitute a_i ** 2 */
   4415        a0a0 += carry;
   4416        if (a0a0 < carry)
   4417            ++a1a1;
   4418 
   4419        /* now add to ps */
   4420        a0a0 += a_i = *ps;
   4421        if (a0a0 < a_i)
   4422            ++a1a1;
   4423        *ps++ = a0a0;
   4424        a1a1 += a_i = *ps;
   4425        carry = (a1a1 < a_i);
   4426        *ps++ = a1a1;
   4427    }
   4428    while (carry) {
   4429        mp_digit s_i = *ps;
   4430        carry += s_i;
   4431        *ps++ = carry;
   4432        carry = carry < s_i;
   4433    }
   4434 #endif
   4435 }
   4436 #endif
   4437 
   4438 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
   4439 /*
   4440 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
   4441 ** so its high bit is 1.   This code is from NSPR.
   4442 */
   4443 mp_err
   4444 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
   4445                mp_digit *qp, mp_digit *rp)
   4446 {
   4447    mp_digit d1, d0, q1, q0;
   4448    mp_digit r1, r0, m;
   4449 
   4450    d1 = divisor >> MP_HALF_DIGIT_BIT;
   4451    d0 = divisor & MP_HALF_DIGIT_MAX;
   4452    r1 = Nhi % d1;
   4453    q1 = Nhi / d1;
   4454    m = q1 * d0;
   4455    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
   4456    if (r1 < m) {
   4457        q1--, r1 += divisor;
   4458        if (r1 >= divisor && r1 < m) {
   4459            q1--, r1 += divisor;
   4460        }
   4461    }
   4462    r1 -= m;
   4463    r0 = r1 % d1;
   4464    q0 = r1 / d1;
   4465    m = q0 * d0;
   4466    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
   4467    if (r0 < m) {
   4468        q0--, r0 += divisor;
   4469        if (r0 >= divisor && r0 < m) {
   4470            q0--, r0 += divisor;
   4471        }
   4472    }
   4473    if (qp)
   4474        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
   4475    if (rp)
   4476        *rp = r0 - m;
   4477    return MP_OKAY;
   4478 }
   4479 #endif
   4480 
   4481 #if MP_SQUARE
   4482 /* {{{ s_mp_sqr(a) */
   4483 
   4484 mp_err
   4485 s_mp_sqr(mp_int *a)
   4486 {
   4487    mp_err res;
   4488    mp_int tmp;
   4489 
   4490    if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
   4491        return res;
   4492    res = mp_sqr(a, &tmp);
   4493    if (res == MP_OKAY) {
   4494        s_mp_exch(&tmp, a);
   4495    }
   4496    mp_clear(&tmp);
   4497    return res;
   4498 }
   4499 
   4500 /* }}} */
   4501 #endif
   4502 
   4503 /* {{{ s_mp_div(a, b) */
   4504 
   4505 /*
   4506  s_mp_div(a, b)
   4507 
   4508  Compute a = a / b and b = a mod b.  Assumes b > a.
   4509 */
   4510 
   4511 mp_err
   4512 s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
   4513         mp_int *div,  /* i: divisor                */
   4514         mp_int *quot) /* i: 0;        o: quotient  */
   4515 {
   4516    mp_int part, t;
   4517    mp_digit q_msd;
   4518    mp_err res;
   4519    mp_digit d;
   4520    mp_digit div_msd;
   4521    int ix;
   4522 
   4523    if (mp_cmp_z(div) == 0)
   4524        return MP_RANGE;
   4525 
   4526    DIGITS(&t) = 0;
   4527    /* Shortcut if divisor is power of two */
   4528    if ((ix = s_mp_ispow2(div)) >= 0) {
   4529        MP_CHECKOK(mp_copy(rem, quot));
   4530        s_mp_div_2d(quot, (mp_digit)ix);
   4531        s_mp_mod_2d(rem, (mp_digit)ix);
   4532 
   4533        return MP_OKAY;
   4534    }
   4535 
   4536    MP_SIGN(rem) = ZPOS;
   4537    MP_SIGN(div) = ZPOS;
   4538    MP_SIGN(&part) = ZPOS;
   4539 
   4540    /* A working temporary for division     */
   4541    MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
   4542 
   4543    /* Normalize to optimize guessing       */
   4544    MP_CHECKOK(s_mp_norm(rem, div, &d));
   4545 
   4546    /* Perform the division itself...woo!   */
   4547    MP_USED(quot) = MP_ALLOC(quot);
   4548 
   4549    /* Find a partial substring of rem which is at least div */
   4550    /* If we didn't find one, we're finished dividing    */
   4551    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
   4552        int i;
   4553        int unusedRem;
   4554        int partExtended = 0; /* set to true if we need to extend part */
   4555 
   4556        unusedRem = MP_USED(rem) - MP_USED(div);
   4557        MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
   4558        MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
   4559        MP_USED(&part) = MP_USED(div);
   4560 
   4561        /* We have now truncated the part of the remainder to the same length as
   4562         * the divisor. If part is smaller than div, extend part by one digit. */
   4563        if (s_mp_cmp(&part, div) < 0) {
   4564            --unusedRem;
   4565 #if MP_ARGCHK == 2
   4566            assert(unusedRem >= 0);
   4567 #endif
   4568            --MP_DIGITS(&part);
   4569            ++MP_USED(&part);
   4570            ++MP_ALLOC(&part);
   4571            partExtended = 1;
   4572        }
   4573 
   4574        /* Compute a guess for the next quotient digit       */
   4575        q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
   4576        div_msd = MP_DIGIT(div, MP_USED(div) - 1);
   4577        if (!partExtended) {
   4578            /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
   4579             * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
   4580             * we didn't extend part, q_msd >= div_msd. Therefore we know that
   4581             * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
   4582             * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
   4583            q_msd = 1;
   4584        } else {
   4585            if (q_msd == div_msd) {
   4586                q_msd = MP_DIGIT_MAX;
   4587            } else {
   4588                mp_digit r;
   4589                MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
   4590                                           div_msd, &q_msd, &r));
   4591            }
   4592        }
   4593 #if MP_ARGCHK == 2
   4594        assert(q_msd > 0); /* This case should never occur any more. */
   4595 #endif
   4596        if (q_msd <= 0)
   4597            break;
   4598 
   4599        /* See what that multiplies out to                   */
   4600        mp_copy(div, &t);
   4601        MP_CHECKOK(s_mp_mul_d(&t, q_msd));
   4602 
   4603        /*
   4604           If it's too big, back it off.  We should not have to do this
   4605           more than once, or, in rare cases, twice.  Knuth describes a
   4606           method by which this could be reduced to a maximum of once, but
   4607           I didn't implement that here.
   4608           When using s_mpv_div_2dx1d, we may have to do this 3 times.
   4609         */
   4610        for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
   4611            --q_msd;
   4612            MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
   4613        }
   4614        if (i < 0) {
   4615            res = MP_RANGE;
   4616            goto CLEANUP;
   4617        }
   4618 
   4619        /* At this point, q_msd should be the right next digit   */
   4620        MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
   4621        s_mp_clamp(rem);
   4622 
   4623        /*
   4624          Include the digit in the quotient.  We allocated enough memory
   4625          for any quotient we could ever possibly get, so we should not
   4626          have to check for failures here
   4627         */
   4628        MP_DIGIT(quot, unusedRem) = q_msd;
   4629    }
   4630 
   4631    /* Denormalize remainder                */
   4632    if (d) {
   4633        s_mp_div_2d(rem, d);
   4634    }
   4635 
   4636    s_mp_clamp(quot);
   4637 
   4638 CLEANUP:
   4639    mp_clear(&t);
   4640 
   4641    return res;
   4642 
   4643 } /* end s_mp_div() */
   4644 
   4645 /* }}} */
   4646 
   4647 /* {{{ s_mp_2expt(a, k) */
   4648 
   4649 mp_err
   4650 s_mp_2expt(mp_int *a, mp_digit k)
   4651 {
   4652    mp_err res;
   4653    mp_size dig, bit;
   4654 
   4655    dig = k / DIGIT_BIT;
   4656    bit = k % DIGIT_BIT;
   4657 
   4658    mp_zero(a);
   4659    if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
   4660        return res;
   4661 
   4662    DIGIT(a, dig) |= ((mp_digit)1 << bit);
   4663 
   4664    return MP_OKAY;
   4665 
   4666 } /* end s_mp_2expt() */
   4667 
   4668 /* }}} */
   4669 
   4670 /* {{{ s_mp_reduce(x, m, mu) */
   4671 
   4672 /*
   4673  Compute Barrett reduction, x (mod m), given a precomputed value for
   4674  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
   4675  faster than straight division, when many reductions by the same
   4676  value of m are required (such as in modular exponentiation).  This
   4677  can nearly halve the time required to do modular exponentiation,
   4678  as compared to using the full integer divide to reduce.
   4679 
   4680  This algorithm was derived from the _Handbook of Applied
   4681  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
   4682  pp. 603-604.
   4683 */
   4684 
   4685 mp_err
   4686 s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
   4687 {
   4688    mp_int q;
   4689    mp_err res;
   4690 
   4691    if ((res = mp_init_copy(&q, x)) != MP_OKAY)
   4692        return res;
   4693 
   4694    s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1)  */
   4695    s_mp_mul(&q, mu);           /* q2 = q1 * mu      */
   4696    s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
   4697 
   4698    /* x = x mod b^(k+1), quick (no division) */
   4699    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
   4700 
   4701    /* q = q * m mod b^(k+1), quick (no division) */
   4702    s_mp_mul(&q, m);
   4703    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
   4704 
   4705    /* x = x - q */
   4706    if ((res = mp_sub(x, &q, x)) != MP_OKAY)
   4707        goto CLEANUP;
   4708 
   4709    /* If x < 0, add b^(k+1) to it */
   4710    if (mp_cmp_z(x) < 0) {
   4711        mp_set(&q, 1);
   4712        if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
   4713            goto CLEANUP;
   4714        if ((res = mp_add(x, &q, x)) != MP_OKAY)
   4715            goto CLEANUP;
   4716    }
   4717 
   4718    /* Back off if it's too big */
   4719    while (mp_cmp(x, m) >= 0) {
   4720        if ((res = s_mp_sub(x, m)) != MP_OKAY)
   4721            break;
   4722    }
   4723 
   4724 CLEANUP:
   4725    mp_clear(&q);
   4726 
   4727    return res;
   4728 
   4729 } /* end s_mp_reduce() */
   4730 
   4731 /* }}} */
   4732 
   4733 /* }}} */
   4734 
   4735 /* {{{ Primitive comparisons */
   4736 
   4737 /* {{{ s_mp_cmp(a, b) */
   4738 
   4739 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
   4740 int
   4741 s_mp_cmp(const mp_int *a, const mp_int *b)
   4742 {
   4743    ARGMPCHK(a != NULL && b != NULL);
   4744 
   4745    mp_size used_a = MP_USED(a);
   4746    {
   4747        mp_size used_b = MP_USED(b);
   4748 
   4749        if (used_a > used_b)
   4750            goto IS_GT;
   4751        if (used_a < used_b)
   4752            goto IS_LT;
   4753    }
   4754    {
   4755        mp_digit *pa, *pb;
   4756        mp_digit da = 0, db = 0;
   4757 
   4758 #define CMP_AB(n)                     \
   4759    if ((da = pa[n]) != (db = pb[n])) \
   4760    goto done
   4761 
   4762        pa = MP_DIGITS(a) + used_a;
   4763        pb = MP_DIGITS(b) + used_a;
   4764        while (used_a >= 4) {
   4765            pa -= 4;
   4766            pb -= 4;
   4767            used_a -= 4;
   4768            CMP_AB(3);
   4769            CMP_AB(2);
   4770            CMP_AB(1);
   4771            CMP_AB(0);
   4772        }
   4773        while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
   4774            /* do nothing */;
   4775    done:
   4776        if (da > db)
   4777            goto IS_GT;
   4778        if (da < db)
   4779            goto IS_LT;
   4780    }
   4781    return MP_EQ;
   4782 IS_LT:
   4783    return MP_LT;
   4784 IS_GT:
   4785    return MP_GT;
   4786 } /* end s_mp_cmp() */
   4787 
   4788 /* }}} */
   4789 
   4790 /* {{{ s_mp_cmp_d(a, d) */
   4791 
   4792 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
   4793 int
   4794 s_mp_cmp_d(const mp_int *a, mp_digit d)
   4795 {
   4796    ARGMPCHK(a != NULL);
   4797 
   4798    if (USED(a) > 1)
   4799        return MP_GT;
   4800 
   4801    if (DIGIT(a, 0) < d)
   4802        return MP_LT;
   4803    else if (DIGIT(a, 0) > d)
   4804        return MP_GT;
   4805    else
   4806        return MP_EQ;
   4807 
   4808 } /* end s_mp_cmp_d() */
   4809 
   4810 /* }}} */
   4811 
   4812 /* {{{ s_mp_ispow2(v) */
   4813 
   4814 /*
   4815  Returns -1 if the value is not a power of two; otherwise, it returns
   4816  k such that v = 2^k, i.e. lg(v).
   4817 */
   4818 int
   4819 s_mp_ispow2(const mp_int *v)
   4820 {
   4821    mp_digit d;
   4822    int extra = 0, ix;
   4823 
   4824    ARGMPCHK(v != NULL);
   4825 
   4826    ix = MP_USED(v) - 1;
   4827    d = MP_DIGIT(v, ix); /* most significant digit of v */
   4828 
   4829    extra = s_mp_ispow2d(d);
   4830    if (extra < 0 || ix == 0)
   4831        return extra;
   4832 
   4833    while (--ix >= 0) {
   4834        if (DIGIT(v, ix) != 0)
   4835            return -1; /* not a power of two */
   4836        extra += MP_DIGIT_BIT;
   4837    }
   4838 
   4839    return extra;
   4840 
   4841 } /* end s_mp_ispow2() */
   4842 
   4843 /* }}} */
   4844 
   4845 /* {{{ s_mp_ispow2d(d) */
   4846 
   4847 int
   4848 s_mp_ispow2d(mp_digit d)
   4849 {
   4850    if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
   4851        int pow = 0;
   4852 #if defined(MP_USE_UINT_DIGIT)
   4853        if (d & 0xffff0000U)
   4854            pow += 16;
   4855        if (d & 0xff00ff00U)
   4856            pow += 8;
   4857        if (d & 0xf0f0f0f0U)
   4858            pow += 4;
   4859        if (d & 0xccccccccU)
   4860            pow += 2;
   4861        if (d & 0xaaaaaaaaU)
   4862            pow += 1;
   4863 #elif defined(MP_USE_LONG_LONG_DIGIT)
   4864        if (d & 0xffffffff00000000ULL)
   4865            pow += 32;
   4866        if (d & 0xffff0000ffff0000ULL)
   4867            pow += 16;
   4868        if (d & 0xff00ff00ff00ff00ULL)
   4869            pow += 8;
   4870        if (d & 0xf0f0f0f0f0f0f0f0ULL)
   4871            pow += 4;
   4872        if (d & 0xccccccccccccccccULL)
   4873            pow += 2;
   4874        if (d & 0xaaaaaaaaaaaaaaaaULL)
   4875            pow += 1;
   4876 #elif defined(MP_USE_LONG_DIGIT)
   4877        if (d & 0xffffffff00000000UL)
   4878            pow += 32;
   4879        if (d & 0xffff0000ffff0000UL)
   4880            pow += 16;
   4881        if (d & 0xff00ff00ff00ff00UL)
   4882            pow += 8;
   4883        if (d & 0xf0f0f0f0f0f0f0f0UL)
   4884            pow += 4;
   4885        if (d & 0xccccccccccccccccUL)
   4886            pow += 2;
   4887        if (d & 0xaaaaaaaaaaaaaaaaUL)
   4888            pow += 1;
   4889 #else
   4890 #error "unknown type for mp_digit"
   4891 #endif
   4892        return pow;
   4893    }
   4894    return -1;
   4895 
   4896 } /* end s_mp_ispow2d() */
   4897 
   4898 /* }}} */
   4899 
   4900 /* }}} */
   4901 
   4902 /* {{{ Primitive I/O helpers */
   4903 
   4904 /* {{{ s_mp_tovalue(ch, r) */
   4905 
   4906 /*
   4907  Convert the given character to its digit value, in the given radix.
   4908  If the given character is not understood in the given radix, -1 is
   4909  returned.  Otherwise the digit's numeric value is returned.
   4910 
   4911  The results will be odd if you use a radix < 2 or > 62, you are
   4912  expected to know what you're up to.
   4913 */
   4914 int
   4915 s_mp_tovalue(char ch, int r)
   4916 {
   4917    int val, xch;
   4918 
   4919    if (r > 36)
   4920        xch = ch;
   4921    else
   4922        xch = toupper((unsigned char)ch);
   4923 
   4924    if (isdigit((unsigned char)xch))
   4925        val = xch - '0';
   4926    else if (isupper((unsigned char)xch))
   4927        val = xch - 'A' + 10;
   4928    else if (islower((unsigned char)xch))
   4929        val = xch - 'a' + 36;
   4930    else if (xch == '+')
   4931        val = 62;
   4932    else if (xch == '/')
   4933        val = 63;
   4934    else
   4935        return -1;
   4936 
   4937    if (val < 0 || val >= r)
   4938        return -1;
   4939 
   4940    return val;
   4941 
   4942 } /* end s_mp_tovalue() */
   4943 
   4944 /* }}} */
   4945 
   4946 /* {{{ s_mp_todigit(val, r, low) */
   4947 
   4948 /*
   4949  Convert val to a radix-r digit, if possible.  If val is out of range
   4950  for r, returns zero.  Otherwise, returns an ASCII character denoting
   4951  the value in the given radix.
   4952 
   4953  The results may be odd if you use a radix < 2 or > 64, you are
   4954  expected to know what you're doing.
   4955 */
   4956 
   4957 char
   4958 s_mp_todigit(mp_digit val, int r, int low)
   4959 {
   4960    char ch;
   4961 
   4962    if (val >= r)
   4963        return 0;
   4964 
   4965    ch = s_dmap_1[val];
   4966 
   4967    if (r <= 36 && low)
   4968        ch = tolower((unsigned char)ch);
   4969 
   4970    return ch;
   4971 
   4972 } /* end s_mp_todigit() */
   4973 
   4974 /* }}} */
   4975 
   4976 /* {{{ s_mp_outlen(bits, radix) */
   4977 
   4978 /*
   4979   Return an estimate for how long a string is needed to hold a radix
   4980   r representation of a number with 'bits' significant bits, plus an
   4981   extra for a zero terminator (assuming C style strings here)
   4982 */
   4983 int
   4984 s_mp_outlen(int bits, int r)
   4985 {
   4986    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
   4987 
   4988 } /* end s_mp_outlen() */
   4989 
   4990 /* }}} */
   4991 
   4992 /* }}} */
   4993 
   4994 /* {{{ mp_read_unsigned_octets(mp, str, len) */
   4995 /* mp_read_unsigned_octets(mp, str, len)
   4996   Read in a raw value (base 256) into the given mp_int
   4997   No sign bit, number is positive.  Leading zeros ignored.
   4998 */
   4999 
   5000 mp_err
   5001 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
   5002 {
   5003    int count;
   5004    mp_err res;
   5005    mp_digit d;
   5006 
   5007    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
   5008 
   5009    mp_zero(mp);
   5010 
   5011    count = len % sizeof(mp_digit);
   5012    if (count) {
   5013        for (d = 0; count-- > 0; --len) {
   5014            d = (d << 8) | *str++;
   5015        }
   5016        MP_DIGIT(mp, 0) = d;
   5017    }
   5018 
   5019    /* Read the rest of the digits */
   5020    for (; len > 0; len -= sizeof(mp_digit)) {
   5021        for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
   5022            d = (d << 8) | *str++;
   5023        }
   5024        if (MP_EQ == mp_cmp_z(mp)) {
   5025            if (!d)
   5026                continue;
   5027        } else {
   5028            if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
   5029                return res;
   5030        }
   5031        MP_DIGIT(mp, 0) = d;
   5032    }
   5033    return MP_OKAY;
   5034 } /* end mp_read_unsigned_octets() */
   5035 /* }}} */
   5036 
   5037 /* {{{ mp_unsigned_octet_size(mp) */
   5038 unsigned int
   5039 mp_unsigned_octet_size(const mp_int *mp)
   5040 {
   5041    unsigned int bytes;
   5042    int ix;
   5043    mp_digit d = 0;
   5044 
   5045    ARGCHK(mp != NULL, MP_BADARG);
   5046    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
   5047 
   5048    bytes = (USED(mp) * sizeof(mp_digit));
   5049 
   5050    /* subtract leading zeros. */
   5051    /* Iterate over each digit... */
   5052    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   5053        d = DIGIT(mp, ix);
   5054        if (d)
   5055            break;
   5056        bytes -= sizeof(d);
   5057    }
   5058    if (!bytes)
   5059        return 1;
   5060 
   5061    /* Have MSD, check digit bytes, high order first */
   5062    for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
   5063        unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
   5064        if (x)
   5065            break;
   5066        --bytes;
   5067    }
   5068    return bytes;
   5069 } /* end mp_unsigned_octet_size() */
   5070 /* }}} */
   5071 
   5072 /* {{{ mp_to_unsigned_octets(mp, str) */
   5073 /* output a buffer of big endian octets no longer than specified. */
   5074 mp_err
   5075 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
   5076 {
   5077    int ix, pos = 0;
   5078    unsigned int bytes;
   5079 
   5080    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
   5081 
   5082    bytes = mp_unsigned_octet_size(mp);
   5083    ARGCHK(bytes <= maxlen, MP_BADARG);
   5084 
   5085    /* Iterate over each digit... */
   5086    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   5087        mp_digit d = DIGIT(mp, ix);
   5088        int jx;
   5089 
   5090        /* Unpack digit bytes, high order first */
   5091        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
   5092            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
   5093            if (!pos && !x) /* suppress leading zeros */
   5094                continue;
   5095            str[pos++] = x;
   5096        }
   5097    }
   5098    if (!pos)
   5099        str[pos++] = 0;
   5100    return pos;
   5101 } /* end mp_to_unsigned_octets() */
   5102 /* }}} */
   5103 
   5104 /* {{{ mp_to_signed_octets(mp, str) */
   5105 /* output a buffer of big endian octets no longer than specified. */
   5106 mp_err
   5107 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
   5108 {
   5109    int ix, pos = 0;
   5110    unsigned int bytes;
   5111 
   5112    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
   5113 
   5114    bytes = mp_unsigned_octet_size(mp);
   5115    ARGCHK(bytes <= maxlen, MP_BADARG);
   5116 
   5117    /* Iterate over each digit... */
   5118    for (ix = USED(mp) - 1; ix >= 0; ix--) {
   5119        mp_digit d = DIGIT(mp, ix);
   5120        int jx;
   5121 
   5122        /* Unpack digit bytes, high order first */
   5123        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
   5124            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
   5125            if (!pos) {
   5126                if (!x) /* suppress leading zeros */
   5127                    continue;
   5128                if (x & 0x80) { /* add one leading zero to make output positive.  */
   5129                    ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
   5130                    if (bytes + 1 > maxlen)
   5131                        return MP_BADARG;
   5132                    str[pos++] = 0;
   5133                }
   5134            }
   5135            str[pos++] = x;
   5136        }
   5137    }
   5138    if (!pos)
   5139        str[pos++] = 0;
   5140    return pos;
   5141 } /* end mp_to_signed_octets() */
   5142 /* }}} */
   5143 
   5144 /* {{{ mp_to_fixlen_octets(mp, str) */
   5145 /* output a buffer of big endian octets exactly as long as requested.
   5146   constant time on the value of mp. */
   5147 mp_err
   5148 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
   5149 {
   5150    int ix, jx;
   5151    unsigned int bytes;
   5152 
   5153    ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
   5154 
   5155    /* Constant time on the value of mp.  Don't use mp_unsigned_octet_size. */
   5156    bytes = USED(mp) * MP_DIGIT_SIZE;
   5157 
   5158    /* If the output is shorter than the native size of mp, then check that any
   5159     * bytes not written have zero values.  This check isn't constant time on
   5160     * the assumption that timing-sensitive callers can guarantee that mp fits
   5161     * in the allocated space. */
   5162    ix = USED(mp) - 1;
   5163    if (bytes > length) {
   5164        unsigned int zeros = bytes - length;
   5165 
   5166        while (zeros >= MP_DIGIT_SIZE) {
   5167            ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
   5168            zeros -= MP_DIGIT_SIZE;
   5169            ix--;
   5170        }
   5171 
   5172        if (zeros > 0) {
   5173            mp_digit d = DIGIT(mp, ix);
   5174            mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
   5175            ARGCHK((d & m) == 0, MP_BADARG);
   5176            for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
   5177                *str++ = d >> (jx * CHAR_BIT);
   5178            }
   5179            ix--;
   5180        }
   5181    } else if (bytes < length) {
   5182        /* Place any needed leading zeros. */
   5183        unsigned int zeros = length - bytes;
   5184        memset(str, 0, zeros);
   5185        str += zeros;
   5186    }
   5187 
   5188    /* Iterate over each whole digit... */
   5189    for (; ix >= 0; ix--) {
   5190        mp_digit d = DIGIT(mp, ix);
   5191 
   5192        /* Unpack digit bytes, high order first */
   5193        for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
   5194            *str++ = d >> (jx * CHAR_BIT);
   5195        }
   5196    }
   5197    return MP_OKAY;
   5198 } /* end mp_to_fixlen_octets() */
   5199 /* }}} */
   5200 
   5201 /* {{{ mp_cswap(condition, a, b, numdigits) */
   5202 /* performs a conditional swap between mp_int. */
   5203 mp_err
   5204 mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
   5205 {
   5206    mp_digit x;
   5207    unsigned int i;
   5208    mp_err res = 0;
   5209 
   5210    /* if pointers are equal return */
   5211    if (a == b)
   5212        return res;
   5213 
   5214    if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
   5215        MP_CHECKOK(s_mp_grow(a, numdigits));
   5216        MP_CHECKOK(s_mp_grow(b, numdigits));
   5217    }
   5218 
   5219    condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
   5220 
   5221    x = (USED(a) ^ USED(b)) & condition;
   5222    USED(a) ^= x;
   5223    USED(b) ^= x;
   5224 
   5225    x = (SIGN(a) ^ SIGN(b)) & condition;
   5226    SIGN(a) ^= x;
   5227    SIGN(b) ^= x;
   5228 
   5229    for (i = 0; i < numdigits; i++) {
   5230        x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
   5231        DIGIT(a, i) ^= x;
   5232        DIGIT(b, i) ^= x;
   5233    }
   5234 
   5235 CLEANUP:
   5236    return res;
   5237 } /* end mp_cswap() */
   5238 /* }}} */
   5239 
   5240 /*------------------------------------------------------------------------*/
   5241 /* HERE THERE BE DRAGONS                                                  */