mpvalpha.c (5462B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mpi-priv.h" 6 #include <c_asm.h> 7 8 #define MP_MUL_DxD(a, b, Phi, Plo) \ 9 { \ 10 Plo = asm("mulq %a0, %a1, %v0", a, b); \ 11 Phi = asm("umulh %a0, %a1, %v0", a, b); \ 12 } 13 14 /* This is empty for the loop in s_mpv_mul_d */ 15 #define CARRY_ADD 16 17 #define ONE_MUL \ 18 a_i = *a++; \ 19 MP_MUL_DxD(a_i, b, a1b1, a0b0); \ 20 a0b0 += carry; \ 21 if (a0b0 < carry) \ 22 ++a1b1; \ 23 CARRY_ADD \ 24 *c++ = a0b0; \ 25 carry = a1b1; 26 27 #define FOUR_MUL \ 28 ONE_MUL \ 29 ONE_MUL \ 30 ONE_MUL \ 31 ONE_MUL 32 33 #define SIXTEEN_MUL \ 34 FOUR_MUL \ 35 FOUR_MUL \ 36 FOUR_MUL \ 37 FOUR_MUL 38 39 #define THIRTYTWO_MUL \ 40 SIXTEEN_MUL \ 41 SIXTEEN_MUL 42 43 #define ONETWENTYEIGHT_MUL \ 44 THIRTYTWO_MUL \ 45 THIRTYTWO_MUL \ 46 THIRTYTWO_MUL \ 47 THIRTYTWO_MUL 48 49 #define EXPAND_256(CALL) \ 50 mp_digit carry = 0; \ 51 mp_digit a_i; \ 52 mp_digit a0b0, a1b1; \ 53 if (a_len & 255) { \ 54 if (a_len & 1) { \ 55 ONE_MUL \ 56 } \ 57 if (a_len & 2) { \ 58 ONE_MUL \ 59 ONE_MUL \ 60 } \ 61 if (a_len & 4) { \ 62 FOUR_MUL \ 63 } \ 64 if (a_len & 8) { \ 65 FOUR_MUL \ 66 FOUR_MUL \ 67 } \ 68 if (a_len & 16) { \ 69 SIXTEEN_MUL \ 70 } \ 71 if (a_len & 32) { \ 72 THIRTYTWO_MUL \ 73 } \ 74 if (a_len & 64) { \ 75 THIRTYTWO_MUL \ 76 THIRTYTWO_MUL \ 77 } \ 78 if (a_len & 128) { \ 79 ONETWENTYEIGHT_MUL \ 80 } \ 81 a_len = a_len & (-256); \ 82 } \ 83 if (a_len >= 256) { \ 84 carry = CALL(a, a_len, b, c, carry); \ 85 c += a_len; \ 86 } 87 88 #define FUNC_NAME(NAME) \ 89 mp_digit NAME(const mp_digit *a, \ 90 mp_size a_len, \ 91 mp_digit b, mp_digit *c, \ 92 mp_digit carry) 93 94 #define DECLARE_MUL_256(FNAME) \ 95 FUNC_NAME(FNAME) \ 96 { \ 97 mp_digit a_i; \ 98 mp_digit a0b0, a1b1; \ 99 while (a_len) { \ 100 ONETWENTYEIGHT_MUL \ 101 ONETWENTYEIGHT_MUL \ 102 a_len -= 256; \ 103 } \ 104 return carry; \ 105 } 106 107 /* Expanding the loop in s_mpv_mul_d appeared to slow down the 108 (admittedly) small number of tests (i.e., timetest) used to 109 measure performance, so this define disables that optimization. */ 110 #define DO_NOT_EXPAND 1 111 112 /* Need forward declaration so it can be instantiated after 113 the routine that uses it; this helps locality somewhat */ 114 #if !defined(DO_NOT_EXPAND) 115 FUNC_NAME(s_mpv_mul_d_MUL256); 116 #endif 117 118 /* c = a * b */ 119 void 120 s_mpv_mul_d(const mp_digit *a, mp_size a_len, 121 mp_digit b, mp_digit *c) 122 { 123 #if defined(DO_NOT_EXPAND) 124 mp_digit carry = 0; 125 while (a_len--) { 126 mp_digit a_i = *a++; 127 mp_digit a0b0, a1b1; 128 129 MP_MUL_DxD(a_i, b, a1b1, a0b0); 130 131 a0b0 += carry; 132 if (a0b0 < carry) 133 ++a1b1; 134 *c++ = a0b0; 135 carry = a1b1; 136 } 137 #else 138 EXPAND_256(s_mpv_mul_d_MUL256) 139 #endif 140 *c = carry; 141 } 142 143 #if !defined(DO_NOT_EXPAND) 144 DECLARE_MUL_256(s_mpv_mul_d_MUL256) 145 #endif 146 147 #undef CARRY_ADD 148 /* This is redefined for the loop in s_mpv_mul_d_add */ 149 #define CARRY_ADD \ 150 a0b0 += a_i = *c; \ 151 if (a0b0 < a_i) \ 152 ++a1b1; 153 154 /* Need forward declaration so it can be instantiated between the 155 two routines that use it; this helps locality somewhat */ 156 FUNC_NAME(s_mpv_mul_d_add_MUL256); 157 158 /* c += a * b */ 159 void 160 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, 161 mp_digit b, mp_digit *c) 162 { 163 EXPAND_256(s_mpv_mul_d_add_MUL256) 164 *c = carry; 165 } 166 167 /* Instantiate multiply 256 routine here */ 168 DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) 169 170 /* Presently, this is only used by the Montgomery arithmetic code. */ 171 /* c += a * b */ 172 void 173 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, 174 mp_digit b, mp_digit *c) 175 { 176 EXPAND_256(s_mpv_mul_d_add_MUL256) 177 while (carry) { 178 mp_digit c_i = *c; 179 carry += c_i; 180 *c++ = carry; 181 carry = carry < c_i; 182 } 183 }