mpi_mips.s (8834B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #include <regdef.h> 5 .set noreorder 6 .set noat 7 8 .section .text, 1, 0x00000006, 4, 4 9 .text: 10 .section .text 11 12 .ent s_mpv_mul_d_add 13 .globl s_mpv_mul_d_add 14 15 s_mpv_mul_d_add: 16 #/* c += a * b */ 17 #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 18 # mp_digit *c) 19 #{ 20 # mp_digit a0, a1; regs a4, a5 21 # mp_digit c0, c1; regs a6, a7 22 # mp_digit cy = 0; reg t2 23 # mp_word w0, w1; regs t0, t1 24 # 25 # if (a_len) { 26 beq a1,zero,.L.1 27 move t2,zero # cy = 0 28 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) 29 dsrl32 a2,a2,0 # This clears the upper 32 bits. 30 # a0 = a[0]; 31 lwu a4,0(a0) 32 # w0 = ((mp_word)b * a0); 33 dmultu a2,a4 34 # if (--a_len) { 35 addiu a1,a1,-1 36 beq a1,zero,.L.2 37 # while (a_len >= 2) { 38 sltiu t3,a1,2 39 bne t3,zero,.L.3 40 # a1 = a[1]; 41 lwu a5,4(a0) 42 .L.4: 43 # a_len -= 2; 44 addiu a1,a1,-2 45 # c0 = c[0]; 46 lwu a6,0(a3) 47 # w0 += cy; 48 mflo t0 49 daddu t0,t0,t2 50 # w0 += c0; 51 daddu t0,t0,a6 52 # w1 = (mp_word)b * a1; 53 dmultu a2,a5 # 54 # cy = CARRYOUT(w0); 55 dsrl32 t2,t0,0 56 # c[0] = ACCUM(w0); 57 sw t0,0(a3) 58 # a0 = a[2]; 59 lwu a4,8(a0) 60 # a += 2; 61 addiu a0,a0,8 62 # c1 = c[1]; 63 lwu a7,4(a3) 64 # w1 += cy; 65 mflo t1 66 daddu t1,t1,t2 67 # w1 += c1; 68 daddu t1,t1,a7 69 # w0 = (mp_word)b * a0; 70 dmultu a2,a4 # 71 # cy = CARRYOUT(w1); 72 dsrl32 t2,t1,0 73 # c[1] = ACCUM(w1); 74 sw t1,4(a3) 75 # c += 2; 76 addiu a3,a3,8 77 sltiu t3,a1,2 78 beq t3,zero,.L.4 79 # a1 = a[1]; 80 lwu a5,4(a0) 81 # } 82 .L.3: 83 # c0 = c[0]; 84 lwu a6,0(a3) 85 # w0 += cy; 86 # if (a_len) { 87 mflo t0 88 beq a1,zero,.L.5 89 daddu t0,t0,t2 90 # w1 = (mp_word)b * a1; 91 dmultu a2,a5 92 # w0 += c0; 93 daddu t0,t0,a6 # 94 # cy = CARRYOUT(w0); 95 dsrl32 t2,t0,0 96 # c[0] = ACCUM(w0); 97 sw t0,0(a3) 98 # c1 = c[1]; 99 lwu a7,4(a3) 100 # w1 += cy; 101 mflo t1 102 daddu t1,t1,t2 103 # w1 += c1; 104 daddu t1,t1,a7 105 # c[1] = ACCUM(w1); 106 sw t1,4(a3) 107 # cy = CARRYOUT(w1); 108 dsrl32 t2,t1,0 109 # c += 1; 110 b .L.6 111 addiu a3,a3,4 112 # } else { 113 .L.5: 114 # w0 += c0; 115 daddu t0,t0,a6 116 # c[0] = ACCUM(w0); 117 sw t0,0(a3) 118 # cy = CARRYOUT(w0); 119 b .L.6 120 dsrl32 t2,t0,0 121 # } 122 # } else { 123 .L.2: 124 # c0 = c[0]; 125 lwu a6,0(a3) 126 # w0 += c0; 127 mflo t0 128 daddu t0,t0,a6 129 # c[0] = ACCUM(w0); 130 sw t0,0(a3) 131 # cy = CARRYOUT(w0); 132 dsrl32 t2,t0,0 133 # } 134 .L.6: 135 # c[1] = cy; 136 jr ra 137 sw t2,4(a3) 138 # } 139 .L.1: 140 jr ra 141 nop 142 #} 143 # 144 .end s_mpv_mul_d_add 145 146 .ent s_mpv_mul_d_add_prop 147 .globl s_mpv_mul_d_add_prop 148 149 s_mpv_mul_d_add_prop: 150 #/* c += a * b */ 151 #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, 152 # mp_digit *c) 153 #{ 154 # mp_digit a0, a1; regs a4, a5 155 # mp_digit c0, c1; regs a6, a7 156 # mp_digit cy = 0; reg t2 157 # mp_word w0, w1; regs t0, t1 158 # 159 # if (a_len) { 160 beq a1,zero,.M.1 161 move t2,zero # cy = 0 162 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) 163 dsrl32 a2,a2,0 # This clears the upper 32 bits. 164 # a0 = a[0]; 165 lwu a4,0(a0) 166 # w0 = ((mp_word)b * a0); 167 dmultu a2,a4 168 # if (--a_len) { 169 addiu a1,a1,-1 170 beq a1,zero,.M.2 171 # while (a_len >= 2) { 172 sltiu t3,a1,2 173 bne t3,zero,.M.3 174 # a1 = a[1]; 175 lwu a5,4(a0) 176 .M.4: 177 # a_len -= 2; 178 addiu a1,a1,-2 179 # c0 = c[0]; 180 lwu a6,0(a3) 181 # w0 += cy; 182 mflo t0 183 daddu t0,t0,t2 184 # w0 += c0; 185 daddu t0,t0,a6 186 # w1 = (mp_word)b * a1; 187 dmultu a2,a5 # 188 # cy = CARRYOUT(w0); 189 dsrl32 t2,t0,0 190 # c[0] = ACCUM(w0); 191 sw t0,0(a3) 192 # a0 = a[2]; 193 lwu a4,8(a0) 194 # a += 2; 195 addiu a0,a0,8 196 # c1 = c[1]; 197 lwu a7,4(a3) 198 # w1 += cy; 199 mflo t1 200 daddu t1,t1,t2 201 # w1 += c1; 202 daddu t1,t1,a7 203 # w0 = (mp_word)b * a0; 204 dmultu a2,a4 # 205 # cy = CARRYOUT(w1); 206 dsrl32 t2,t1,0 207 # c[1] = ACCUM(w1); 208 sw t1,4(a3) 209 # c += 2; 210 addiu a3,a3,8 211 sltiu t3,a1,2 212 beq t3,zero,.M.4 213 # a1 = a[1]; 214 lwu a5,4(a0) 215 # } 216 .M.3: 217 # c0 = c[0]; 218 lwu a6,0(a3) 219 # w0 += cy; 220 # if (a_len) { 221 mflo t0 222 beq a1,zero,.M.5 223 daddu t0,t0,t2 224 # w1 = (mp_word)b * a1; 225 dmultu a2,a5 226 # w0 += c0; 227 daddu t0,t0,a6 # 228 # cy = CARRYOUT(w0); 229 dsrl32 t2,t0,0 230 # c[0] = ACCUM(w0); 231 sw t0,0(a3) 232 # c1 = c[1]; 233 lwu a7,4(a3) 234 # w1 += cy; 235 mflo t1 236 daddu t1,t1,t2 237 # w1 += c1; 238 daddu t1,t1,a7 239 # c[1] = ACCUM(w1); 240 sw t1,4(a3) 241 # cy = CARRYOUT(w1); 242 dsrl32 t2,t1,0 243 # c += 1; 244 b .M.6 245 addiu a3,a3,8 246 # } else { 247 .M.5: 248 # w0 += c0; 249 daddu t0,t0,a6 250 # c[0] = ACCUM(w0); 251 sw t0,0(a3) 252 # cy = CARRYOUT(w0); 253 dsrl32 t2,t0,0 254 b .M.6 255 addiu a3,a3,4 256 # } 257 # } else { 258 .M.2: 259 # c0 = c[0]; 260 lwu a6,0(a3) 261 # w0 += c0; 262 mflo t0 263 daddu t0,t0,a6 264 # c[0] = ACCUM(w0); 265 sw t0,0(a3) 266 # cy = CARRYOUT(w0); 267 dsrl32 t2,t0,0 268 addiu a3,a3,4 269 # } 270 .M.6: 271 272 # while (cy) { 273 beq t2,zero,.M.1 274 nop 275 .M.7: 276 # mp_word w = (mp_word)*c + cy; 277 lwu a6,0(a3) 278 daddu t2,t2,a6 279 # *c++ = ACCUM(w); 280 sw t2,0(a3) 281 # cy = CARRYOUT(w); 282 dsrl32 t2,t2,0 283 bne t2,zero,.M.7 284 addiu a3,a3,4 285 286 # } 287 .M.1: 288 jr ra 289 nop 290 #} 291 # 292 .end s_mpv_mul_d_add_prop 293 294 .ent s_mpv_mul_d 295 .globl s_mpv_mul_d 296 297 s_mpv_mul_d: 298 #/* c = a * b */ 299 #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, 300 # mp_digit *c) 301 #{ 302 # mp_digit a0, a1; regs a4, a5 303 # mp_digit cy = 0; reg t2 304 # mp_word w0, w1; regs t0, t1 305 # 306 # if (a_len) { 307 beq a1,zero,.N.1 308 move t2,zero # cy = 0 309 dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) 310 dsrl32 a2,a2,0 # This clears the upper 32 bits. 311 # a0 = a[0]; 312 lwu a4,0(a0) 313 # w0 = ((mp_word)b * a0); 314 dmultu a2,a4 315 # if (--a_len) { 316 addiu a1,a1,-1 317 beq a1,zero,.N.2 318 # while (a_len >= 2) { 319 sltiu t3,a1,2 320 bne t3,zero,.N.3 321 # a1 = a[1]; 322 lwu a5,4(a0) 323 .N.4: 324 # a_len -= 2; 325 addiu a1,a1,-2 326 # w0 += cy; 327 mflo t0 328 daddu t0,t0,t2 329 # cy = CARRYOUT(w0); 330 dsrl32 t2,t0,0 331 # w1 = (mp_word)b * a1; 332 dmultu a2,a5 333 # c[0] = ACCUM(w0); 334 sw t0,0(a3) 335 # a0 = a[2]; 336 lwu a4,8(a0) 337 # a += 2; 338 addiu a0,a0,8 339 # w1 += cy; 340 mflo t1 341 daddu t1,t1,t2 342 # cy = CARRYOUT(w1); 343 dsrl32 t2,t1,0 344 # w0 = (mp_word)b * a0; 345 dmultu a2,a4 346 # c[1] = ACCUM(w1); 347 sw t1,4(a3) 348 # c += 2; 349 addiu a3,a3,8 350 sltiu t3,a1,2 351 beq t3,zero,.N.4 352 # a1 = a[1]; 353 lwu a5,4(a0) 354 # } 355 .N.3: 356 # w0 += cy; 357 # if (a_len) { 358 mflo t0 359 beq a1,zero,.N.5 360 daddu t0,t0,t2 361 # w1 = (mp_word)b * a1; 362 dmultu a2,a5 # 363 # cy = CARRYOUT(w0); 364 dsrl32 t2,t0,0 365 # c[0] = ACCUM(w0); 366 sw t0,0(a3) 367 # w1 += cy; 368 mflo t1 369 daddu t1,t1,t2 370 # c[1] = ACCUM(w1); 371 sw t1,4(a3) 372 # cy = CARRYOUT(w1); 373 dsrl32 t2,t1,0 374 # c += 1; 375 b .N.6 376 addiu a3,a3,4 377 # } else { 378 .N.5: 379 # c[0] = ACCUM(w0); 380 sw t0,0(a3) 381 # cy = CARRYOUT(w0); 382 b .N.6 383 dsrl32 t2,t0,0 384 # } 385 # } else { 386 .N.2: 387 mflo t0 388 # c[0] = ACCUM(w0); 389 sw t0,0(a3) 390 # cy = CARRYOUT(w0); 391 dsrl32 t2,t0,0 392 # } 393 .N.6: 394 # c[1] = cy; 395 jr ra 396 sw t2,4(a3) 397 # } 398 .N.1: 399 jr ra 400 nop 401 #} 402 # 403 .end s_mpv_mul_d 404 405 406 .ent s_mpv_sqr_add_prop 407 .globl s_mpv_sqr_add_prop 408 #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); 409 # registers 410 # a0 *a 411 # a1 a_len 412 # a2 *sqr 413 # a3 digit from *a, a_i 414 # a4 square of digit from a 415 # a5,a6 next 2 digits in sqr 416 # a7,t0 carry 417 s_mpv_sqr_add_prop: 418 move a7,zero 419 move t0,zero 420 lwu a3,0(a0) 421 addiu a1,a1,-1 # --a_len 422 dmultu a3,a3 423 beq a1,zero,.P.3 # jump if we've already done the only sqr 424 addiu a0,a0,4 # ++a 425 .P.2: 426 lwu a5,0(a2) 427 lwu a6,4(a2) 428 addiu a2,a2,8 # sqrs += 2; 429 dsll32 a6,a6,0 430 daddu a5,a5,a6 431 lwu a3,0(a0) 432 addiu a0,a0,4 # ++a 433 mflo a4 434 daddu a6,a5,a4 435 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow 436 dmultu a3,a3 437 daddu a4,a6,t0 438 sltu t0,a4,a6 439 add t0,t0,a7 440 sw a4,-8(a2) 441 addiu a1,a1,-1 # --a_len 442 dsrl32 a4,a4,0 443 bne a1,zero,.P.2 # loop if a_len > 0 444 sw a4,-4(a2) 445 .P.3: 446 lwu a5,0(a2) 447 lwu a6,4(a2) 448 addiu a2,a2,8 # sqrs += 2; 449 dsll32 a6,a6,0 450 daddu a5,a5,a6 451 mflo a4 452 daddu a6,a5,a4 453 sltu a7,a6,a5 # a7 = a6 < a5 detect overflow 454 daddu a4,a6,t0 455 sltu t0,a4,a6 456 add t0,t0,a7 457 sw a4,-8(a2) 458 beq t0,zero,.P.9 # jump if no carry 459 dsrl32 a4,a4,0 460 .P.8: 461 sw a4,-4(a2) 462 /* propagate final carry */ 463 lwu a5,0(a2) 464 daddu a6,a5,t0 465 sltu t0,a6,a5 466 bne t0,zero,.P.8 # loop if carry persists 467 addiu a2,a2,4 # sqrs++ 468 .P.9: 469 jr ra 470 sw a4,-4(a2) 471 472 .end s_mpv_sqr_add_prop