vis_32.il (26216B)
1 ! 2 ! This Source Code Form is subject to the terms of the Mozilla Public 3 ! License, v. 2.0. If a copy of the MPL was not distributed with this 4 ! file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 ! The interface to the VIS instructions as declared below (and in the VIS 7 ! User's Manual) will not change, but the macro implementation might change 8 ! in the future. 9 10 !-------------------------------------------------------------------- 11 ! Pure edge handling instructions 12 ! 13 ! int vis_edge8(void */*frs1*/, void */*frs2*/); 14 ! 15 .inline vis_edge8,8 16 edge8 %o0,%o1,%o0 17 .end 18 ! 19 ! int vis_edge8l(void */*frs1*/, void */*frs2*/); 20 ! 21 .inline vis_edge8l,8 22 edge8l %o0,%o1,%o0 23 .end 24 ! 25 ! int vis_edge16(void */*frs1*/, void */*frs2*/); 26 ! 27 .inline vis_edge16,8 28 edge16 %o0,%o1,%o0 29 .end 30 ! 31 ! int vis_edge16l(void */*frs1*/, void */*frs2*/); 32 ! 33 .inline vis_edge16l,8 34 edge16l %o0,%o1,%o0 35 .end 36 ! 37 ! int vis_edge32(void */*frs1*/, void */*frs2*/); 38 ! 39 .inline vis_edge32,8 40 edge32 %o0,%o1,%o0 41 .end 42 ! 43 ! int vis_edge32l(void */*frs1*/, void */*frs2*/); 44 ! 45 .inline vis_edge32l,8 46 edge32l %o0,%o1,%o0 47 .end 48 49 !-------------------------------------------------------------------- 50 ! Edge handling instructions with negative return values if cc set 51 ! 52 ! int vis_edge8cc(void */*frs1*/, void */*frs2*/); 53 ! 54 .inline vis_edge8cc,8 55 edge8 %o0,%o1,%o0 56 mov 0,%o1 57 movgu %icc,-1024,%o1 58 or %o1,%o0,%o0 59 .end 60 ! 61 ! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); 62 ! 63 .inline vis_edge8lcc,8 64 edge8l %o0,%o1,%o0 65 mov 0,%o1 66 movgu %icc,-1024,%o1 67 or %o1,%o0,%o0 68 .end 69 ! 70 ! int vis_edge16cc(void */*frs1*/, void */*frs2*/); 71 ! 72 .inline vis_edge16cc,8 73 edge16 %o0,%o1,%o0 74 mov 0,%o1 75 movgu %icc,-1024,%o1 76 or %o1,%o0,%o0 77 .end 78 ! 79 ! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); 80 ! 81 .inline vis_edge16lcc,8 82 edge16l %o0,%o1,%o0 83 mov 0,%o1 84 movgu %icc,-1024,%o1 85 or %o1,%o0,%o0 86 .end 87 ! 88 ! int vis_edge32cc(void */*frs1*/, void */*frs2*/); 89 ! 90 .inline vis_edge32cc,8 91 edge32 %o0,%o1,%o0 92 mov 0,%o1 93 movgu %icc,-1024,%o1 94 or %o1,%o0,%o0 95 .end 96 ! 97 ! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); 98 ! 99 .inline vis_edge32lcc,8 100 edge32l %o0,%o1,%o0 101 mov 0,%o1 102 movgu %icc,-1024,%o1 103 or %o1,%o0,%o0 104 .end 105 106 !-------------------------------------------------------------------- 107 ! Alignment instructions 108 ! 109 ! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); 110 ! 111 .inline vis_alignaddr,8 112 alignaddr %o0,%o1,%o0 113 .end 114 ! 115 ! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); 116 ! 117 .inline vis_alignaddrl,8 118 alignaddrl %o0,%o1,%o0 119 .end 120 ! 121 ! double vis_faligndata(double /*frs1*/, double /*frs2*/); 122 ! 123 .inline vis_faligndata,16 124 std %o0,[%sp+0x48] 125 ldd [%sp+0x48],%f4 126 std %o2,[%sp+0x48] 127 ldd [%sp+0x48],%f10 128 faligndata %f4,%f10,%f0 129 .end 130 131 !-------------------------------------------------------------------- 132 ! Partitioned comparison instructions 133 ! 134 ! int vis_fcmple16(double /*frs1*/, double /*frs2*/); 135 ! 136 .inline vis_fcmple16,16 137 std %o0,[%sp+0x48] 138 ldd [%sp+0x48],%f4 139 std %o2,[%sp+0x48] 140 ldd [%sp+0x48],%f10 141 fcmple16 %f4,%f10,%o0 142 .end 143 ! 144 ! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); 145 ! 146 .inline vis_fcmpne16,16 147 std %o0,[%sp+0x48] 148 ldd [%sp+0x48],%f4 149 std %o2,[%sp+0x48] 150 ldd [%sp+0x48],%f10 151 fcmpne16 %f4,%f10,%o0 152 .end 153 ! 154 ! int vis_fcmple32(double /*frs1*/, double /*frs2*/); 155 ! 156 .inline vis_fcmple32,16 157 std %o0,[%sp+0x48] 158 ldd [%sp+0x48],%f4 159 std %o2,[%sp+0x48] 160 ldd [%sp+0x48],%f10 161 fcmple32 %f4,%f10,%o0 162 .end 163 ! 164 ! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); 165 ! 166 .inline vis_fcmpne32,16 167 std %o0,[%sp+0x48] 168 ldd [%sp+0x48],%f4 169 std %o2,[%sp+0x48] 170 ldd [%sp+0x48],%f10 171 fcmpne32 %f4,%f10,%o0 172 .end 173 ! 174 ! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); 175 ! 176 .inline vis_fcmpgt16,16 177 std %o0,[%sp+0x48] 178 ldd [%sp+0x48],%f4 179 std %o2,[%sp+0x48] 180 ldd [%sp+0x48],%f10 181 fcmpgt16 %f4,%f10,%o0 182 .end 183 ! 184 ! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); 185 ! 186 .inline vis_fcmpeq16,16 187 std %o0,[%sp+0x48] 188 ldd [%sp+0x48],%f4 189 std %o2,[%sp+0x48] 190 ldd [%sp+0x48],%f10 191 fcmpeq16 %f4,%f10,%o0 192 .end 193 ! 194 ! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); 195 ! 196 .inline vis_fcmpgt32,16 197 std %o0,[%sp+0x48] 198 ldd [%sp+0x48],%f4 199 std %o2,[%sp+0x48] 200 ldd [%sp+0x48],%f10 201 fcmpgt32 %f4,%f10,%o0 202 .end 203 ! 204 ! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); 205 ! 206 .inline vis_fcmpeq32,16 207 std %o0,[%sp+0x48] 208 ldd [%sp+0x48],%f4 209 std %o2,[%sp+0x48] 210 ldd [%sp+0x48],%f10 211 fcmpeq32 %f4,%f10,%o0 212 .end 213 214 !-------------------------------------------------------------------- 215 ! Partitioned arithmetic 216 ! 217 ! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); 218 ! 219 .inline vis_fmul8x16,12 220 st %o0,[%sp+0x44] 221 ld [%sp+0x44],%f4 222 st %o1,[%sp+0x48] 223 st %o2,[%sp+0x4c] 224 ldd [%sp+0x48],%f10 225 fmul8x16 %f4,%f10,%f0 226 .end 227 ! 228 ! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); 229 ! 230 .inline vis_fmul8x16_dummy,16 231 st %o0,[%sp+0x44] 232 ld [%sp+0x44],%f4 233 std %o2,[%sp+0x48] 234 ldd [%sp+0x48],%f10 235 fmul8x16 %f4,%f10,%f0 236 .end 237 ! 238 ! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); 239 ! 240 .inline vis_fmul8x16au,8 241 st %o0,[%sp+0x48] 242 ld [%sp+0x48],%f4 243 st %o1,[%sp+0x48] 244 ld [%sp+0x48],%f10 245 fmul8x16au %f4,%f10,%f0 246 .end 247 ! 248 ! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); 249 ! 250 .inline vis_fmul8x16al,8 251 st %o0,[%sp+0x44] 252 ld [%sp+0x44],%f4 253 st %o1,[%sp+0x48] 254 ld [%sp+0x48],%f10 255 fmul8x16al %f4,%f10,%f0 256 .end 257 ! 258 ! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); 259 ! 260 .inline vis_fmul8sux16,16 261 std %o0,[%sp+0x48] 262 ldd [%sp+0x48],%f4 263 std %o2,[%sp+0x48] 264 ldd [%sp+0x48],%f10 265 fmul8sux16 %f4,%f10,%f0 266 .end 267 ! 268 ! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); 269 ! 270 .inline vis_fmul8ulx16,16 271 std %o0,[%sp+0x48] 272 ldd [%sp+0x48],%f4 273 std %o2,[%sp+0x48] 274 ldd [%sp+0x48],%f10 275 fmul8ulx16 %f4,%f10,%f0 276 .end 277 ! 278 ! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); 279 ! 280 .inline vis_fmuld8sux16,8 281 st %o0,[%sp+0x48] 282 ld [%sp+0x48],%f4 283 st %o1,[%sp+0x48] 284 ld [%sp+0x48],%f10 285 fmuld8sux16 %f4,%f10,%f0 286 .end 287 ! 288 ! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); 289 ! 290 .inline vis_fmuld8ulx16,8 291 st %o0,[%sp+0x48] 292 ld [%sp+0x48],%f4 293 st %o1,[%sp+0x48] 294 ld [%sp+0x48],%f10 295 fmuld8ulx16 %f4,%f10,%f0 296 .end 297 ! 298 ! double vis_fpadd16(double /*frs1*/, double /*frs2*/); 299 ! 300 .inline vis_fpadd16,16 301 std %o0,[%sp+0x40] 302 ldd [%sp+0x40],%f4 303 std %o2,[%sp+0x48] 304 ldd [%sp+0x48],%f10 305 fpadd16 %f4,%f10,%f0 306 .end 307 ! 308 ! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); 309 ! 310 .inline vis_fpadd16s,8 311 st %o0,[%sp+0x48] 312 ld [%sp+0x48],%f4 313 st %o1,[%sp+0x48] 314 ld [%sp+0x48],%f10 315 fpadd16s %f4,%f10,%f0 316 .end 317 ! 318 ! double vis_fpadd32(double /*frs1*/, double /*frs2*/); 319 ! 320 .inline vis_fpadd32,16 321 std %o0,[%sp+0x48] 322 ldd [%sp+0x48],%f4 323 std %o2,[%sp+0x48] 324 ldd [%sp+0x48],%f10 325 fpadd32 %f4,%f10,%f0 326 .end 327 ! 328 ! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); 329 ! 330 .inline vis_fpadd32s,8 331 st %o0,[%sp+0x48] 332 ld [%sp+0x48],%f4 333 st %o1,[%sp+0x48] 334 ld [%sp+0x48],%f10 335 fpadd32s %f4,%f10,%f0 336 .end 337 ! 338 ! double vis_fpsub16(double /*frs1*/, double /*frs2*/); 339 ! 340 .inline vis_fpsub16,16 341 std %o0,[%sp+0x48] 342 ldd [%sp+0x48],%f4 343 std %o2,[%sp+0x48] 344 ldd [%sp+0x48],%f10 345 fpsub16 %f4,%f10,%f0 346 .end 347 ! 348 ! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); 349 ! 350 .inline vis_fpsub16s,8 351 st %o0,[%sp+0x48] 352 ld [%sp+0x48],%f4 353 st %o1,[%sp+0x48] 354 ld [%sp+0x48],%f10 355 fpsub16s %f4,%f10,%f0 356 .end 357 ! 358 ! double vis_fpsub32(double /*frs1*/, double /*frs2*/); 359 ! 360 .inline vis_fpsub32,16 361 std %o0,[%sp+0x48] 362 ldd [%sp+0x48],%f4 363 std %o2,[%sp+0x48] 364 ldd [%sp+0x48],%f10 365 fpsub32 %f4,%f10,%f0 366 .end 367 ! 368 ! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); 369 ! 370 .inline vis_fpsub32s,8 371 st %o0,[%sp+0x48] 372 ld [%sp+0x48],%f4 373 st %o1,[%sp+0x48] 374 ld [%sp+0x48],%f10 375 fpsub32s %f4,%f10,%f0 376 .end 377 378 !-------------------------------------------------------------------- 379 ! Pixel packing 380 ! 381 ! float vis_fpack16(double /*frs2*/); 382 ! 383 .inline vis_fpack16,8 384 std %o0,[%sp+0x48] 385 ldd [%sp+0x48],%f4 386 fpack16 %f4,%f0 387 .end 388 389 ! 390 ! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); 391 ! 392 .inline vis_fpack16_pair,16 393 std %o0,[%sp+0x48] 394 ldd [%sp+0x48],%f4 395 std %o2,[%sp+0x48] 396 ldd [%sp+0x48],%f10 397 fpack16 %f4,%f0 398 fpack16 %f10,%f1 399 .end 400 ! 401 ! void vis_st2_fpack16(double, double, double *) 402 ! 403 .inline vis_st2_fpack16,20 404 std %o0,[%sp+0x48] 405 ldd [%sp+0x48],%f4 406 std %o2,[%sp+0x48] 407 ldd [%sp+0x48],%f10 408 fpack16 %f4,%f0 409 fpack16 %f10,%f1 410 st %f0,[%o4+0] 411 st %f1,[%o4+4] 412 .end 413 ! 414 ! void vis_std_fpack16(double, double, double *) 415 ! 416 .inline vis_std_fpack16,20 417 std %o0,[%sp+0x48] 418 ldd [%sp+0x48],%f4 419 std %o2,[%sp+0x48] 420 ldd [%sp+0x48],%f10 421 fpack16 %f4,%f0 422 fpack16 %f10,%f1 423 std %f0,[%o4] 424 .end 425 ! 426 ! void vis_st2_fpackfix(double, double, double *) 427 ! 428 .inline vis_st2_fpackfix,20 429 std %o0,[%sp+0x48] 430 ldd [%sp+0x48],%f4 431 std %o2,[%sp+0x48] 432 ldd [%sp+0x48],%f10 433 fpackfix %f4,%f0 434 fpackfix %f10,%f1 435 st %f0,[%o4+0] 436 st %f1,[%o4+4] 437 .end 438 ! 439 ! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); 440 ! 441 .inline vis_fpack16_to_hi,16 442 std %o0,[%sp+0x48] 443 ldd [%sp+0x48],%f0 444 std %o2,[%sp+0x48] 445 ldd [%sp+0x48],%f4 446 fpack16 %f4,%f0 447 .end 448 449 ! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); 450 ! 451 .inline vis_fpack16_to_lo,16 452 std %o0,[%sp+0x48] 453 ldd [%sp+0x48],%f0 454 std %o2,[%sp+0x48] 455 ldd [%sp+0x48],%f4 456 fpack16 %f4,%f3 457 fmovs %f3,%f1 /* without this, optimizer goes wrong */ 458 .end 459 460 ! 461 ! double vis_fpack32(double /*frs1*/, double /*frs2*/); 462 ! 463 .inline vis_fpack32,16 464 std %o0,[%sp+0x48] 465 ldd [%sp+0x48],%f4 466 std %o2,[%sp+0x48] 467 ldd [%sp+0x48],%f10 468 fpack32 %f4,%f10,%f0 469 .end 470 ! 471 ! float vis_fpackfix(double /*frs2*/); 472 ! 473 .inline vis_fpackfix,8 474 std %o0,[%sp+0x48] 475 ldd [%sp+0x48],%f4 476 fpackfix %f4,%f0 477 .end 478 ! 479 ! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); 480 ! 481 .inline vis_fpackfix_pair,16 482 std %o0,[%sp+0x48] 483 ldd [%sp+0x48],%f4 484 std %o2,[%sp+0x48] 485 ldd [%sp+0x48],%f6 486 fpackfix %f4,%f0 487 fpackfix %f6,%f1 488 .end 489 490 !-------------------------------------------------------------------- 491 ! Motion estimation 492 ! 493 ! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); 494 ! 495 .inline vis_pdist,24 496 std %o4,[%sp+0x48] 497 ldd [%sp+0x48],%f0 498 std %o0,[%sp+0x48] 499 ldd [%sp+0x48],%f4 500 std %o2,[%sp+0x48] 501 ldd [%sp+0x48],%f10 502 pdist %f4,%f10,%f0 503 .end 504 505 !-------------------------------------------------------------------- 506 ! Channel merging 507 ! 508 ! double vis_fpmerge(float /*frs1*/, float /*frs2*/); 509 ! 510 .inline vis_fpmerge,8 511 st %o0,[%sp+0x48] 512 ld [%sp+0x48],%f4 513 st %o1,[%sp+0x48] 514 ld [%sp+0x48],%f10 515 fpmerge %f4,%f10,%f0 516 .end 517 518 !-------------------------------------------------------------------- 519 ! Pixel expansion 520 ! 521 ! double vis_fexpand(float /*frs2*/); 522 ! 523 .inline vis_fexpand,4 524 st %o0,[%sp+0x48] 525 ld [%sp+0x48],%f4 526 fexpand %f4,%f0 527 .end 528 529 ! double vis_fexpand_hi(double /*frs2*/); 530 ! 531 .inline vis_fexpand_hi,8 532 std %o0,[%sp+0x48] 533 ldd [%sp+0x48],%f4 534 fexpand %f4,%f0 535 .end 536 537 ! double vis_fexpand_lo(double /*frs2*/); 538 ! 539 .inline vis_fexpand_lo,8 540 std %o0,[%sp+0x48] 541 ldd [%sp+0x48],%f4 542 fmovs %f5, %f2 543 fexpand %f2,%f0 544 .end 545 546 !-------------------------------------------------------------------- 547 ! Bitwise logical operations 548 ! 549 ! double vis_fnor(double /*frs1*/, double /*frs2*/); 550 ! 551 .inline vis_fnor,16 552 std %o0,[%sp+0x48] 553 ldd [%sp+0x48],%f4 554 std %o2,[%sp+0x48] 555 ldd [%sp+0x48],%f10 556 fnor %f4,%f10,%f0 557 .end 558 ! 559 ! float vis_fnors(float /*frs1*/, float /*frs2*/); 560 ! 561 .inline vis_fnors,8 562 st %o0,[%sp+0x48] 563 ld [%sp+0x48],%f4 564 st %o1,[%sp+0x48] 565 ld [%sp+0x48],%f10 566 fnors %f4,%f10,%f0 567 .end 568 ! 569 ! double vis_fandnot(double /*frs1*/, double /*frs2*/); 570 ! 571 .inline vis_fandnot,16 572 std %o0,[%sp+0x48] 573 ldd [%sp+0x48],%f4 574 std %o2,[%sp+0x48] 575 ldd [%sp+0x48],%f10 576 fandnot1 %f4,%f10,%f0 577 .end 578 ! 579 ! float vis_fandnots(float /*frs1*/, float /*frs2*/); 580 ! 581 .inline vis_fandnots,8 582 st %o0,[%sp+0x48] 583 ld [%sp+0x48],%f4 584 st %o1,[%sp+0x48] 585 ld [%sp+0x48],%f10 586 fandnot1s %f4,%f10,%f0 587 .end 588 ! 589 ! double vis_fnot(double /*frs1*/); 590 ! 591 .inline vis_fnot,8 592 std %o0,[%sp+0x48] 593 ldd [%sp+0x48],%f4 594 fnot1 %f4,%f0 595 .end 596 ! 597 ! float vis_fnots(float /*frs1*/); 598 ! 599 .inline vis_fnots,4 600 st %o0,[%sp+0x48] 601 ld [%sp+0x48],%f4 602 fnot1s %f4,%f0 603 .end 604 ! 605 ! double vis_fxor(double /*frs1*/, double /*frs2*/); 606 ! 607 .inline vis_fxor,16 608 std %o0,[%sp+0x48] 609 ldd [%sp+0x48],%f4 610 std %o2,[%sp+0x48] 611 ldd [%sp+0x48],%f10 612 fxor %f4,%f10,%f0 613 .end 614 ! 615 ! float vis_fxors(float /*frs1*/, float /*frs2*/); 616 ! 617 .inline vis_fxors,8 618 st %o0,[%sp+0x48] 619 ld [%sp+0x48],%f4 620 st %o1,[%sp+0x48] 621 ld [%sp+0x48],%f10 622 fxors %f4,%f10,%f0 623 .end 624 ! 625 ! double vis_fnand(double /*frs1*/, double /*frs2*/); 626 ! 627 .inline vis_fnand,16 628 std %o0,[%sp+0x48] 629 ldd [%sp+0x48],%f4 630 std %o2,[%sp+0x48] 631 ldd [%sp+0x48],%f10 632 fnand %f4,%f10,%f0 633 .end 634 ! 635 ! float vis_fnands(float /*frs1*/, float /*frs2*/); 636 ! 637 .inline vis_fnands,8 638 st %o0,[%sp+0x48] 639 ld [%sp+0x48],%f4 640 st %o1,[%sp+0x48] 641 ld [%sp+0x48],%f10 642 fnands %f4,%f10,%f0 643 .end 644 ! 645 ! double vis_fand(double /*frs1*/, double /*frs2*/); 646 ! 647 .inline vis_fand,16 648 std %o0,[%sp+0x48] 649 ldd [%sp+0x48],%f4 650 std %o2,[%sp+0x48] 651 ldd [%sp+0x48],%f10 652 fand %f4,%f10,%f0 653 .end 654 ! 655 ! float vis_fands(float /*frs1*/, float /*frs2*/); 656 ! 657 .inline vis_fands,8 658 st %o0,[%sp+0x48] 659 ld [%sp+0x48],%f4 660 st %o1,[%sp+0x48] 661 ld [%sp+0x48],%f10 662 fands %f4,%f10,%f0 663 .end 664 ! 665 ! double vis_fxnor(double /*frs1*/, double /*frs2*/); 666 ! 667 .inline vis_fxnor,16 668 std %o0,[%sp+0x48] 669 ldd [%sp+0x48],%f4 670 std %o2,[%sp+0x48] 671 ldd [%sp+0x48],%f10 672 fxnor %f4,%f10,%f0 673 .end 674 ! 675 ! float vis_fxnors(float /*frs1*/, float /*frs2*/); 676 ! 677 .inline vis_fxnors,8 678 st %o0,[%sp+0x48] 679 ld [%sp+0x48],%f4 680 st %o1,[%sp+0x48] 681 ld [%sp+0x48],%f10 682 fxnors %f4,%f10,%f0 683 .end 684 ! 685 ! double vis_fsrc(double /*frs1*/); 686 ! 687 .inline vis_fsrc,8 688 std %o0,[%sp+0x48] 689 ldd [%sp+0x48],%f4 690 fsrc1 %f4,%f0 691 .end 692 ! 693 ! float vis_fsrcs(float /*frs1*/); 694 ! 695 .inline vis_fsrcs,4 696 st %o0,[%sp+0x48] 697 ld [%sp+0x48],%f4 698 fsrc1s %f4,%f0 699 .end 700 ! 701 ! double vis_fornot(double /*frs1*/, double /*frs2*/); 702 ! 703 .inline vis_fornot,16 704 std %o0,[%sp+0x48] 705 ldd [%sp+0x48],%f4 706 std %o2,[%sp+0x48] 707 ldd [%sp+0x48],%f10 708 fornot1 %f4,%f10,%f0 709 .end 710 ! 711 ! float vis_fornots(float /*frs1*/, float /*frs2*/); 712 ! 713 .inline vis_fornots,8 714 st %o0,[%sp+0x48] 715 ld [%sp+0x48],%f4 716 st %o1,[%sp+0x48] 717 ld [%sp+0x48],%f10 718 fornot1s %f4,%f10,%f0 719 .end 720 ! 721 ! double vis_for(double /*frs1*/, double /*frs2*/); 722 ! 723 .inline vis_for,16 724 std %o0,[%sp+0x48] 725 ldd [%sp+0x48],%f4 726 std %o2,[%sp+0x48] 727 ldd [%sp+0x48],%f10 728 for %f4,%f10,%f0 729 .end 730 ! 731 ! float vis_fors(float /*frs1*/, float /*frs2*/); 732 ! 733 .inline vis_fors,8 734 st %o0,[%sp+0x48] 735 ld [%sp+0x48],%f4 736 st %o1,[%sp+0x48] 737 ld [%sp+0x48],%f10 738 fors %f4,%f10,%f0 739 .end 740 ! 741 ! double vis_fzero(/* void */) 742 ! 743 .inline vis_fzero,0 744 fzero %f0 745 .end 746 ! 747 ! float vis_fzeros(/* void */) 748 ! 749 .inline vis_fzeros,0 750 fzeros %f0 751 .end 752 ! 753 ! double vis_fone(/* void */) 754 ! 755 .inline vis_fone,0 756 fone %f0 757 .end 758 ! 759 ! float vis_fones(/* void */) 760 ! 761 .inline vis_fones,0 762 fones %f0 763 .end 764 765 !-------------------------------------------------------------------- 766 ! Partial store instructions 767 ! 768 ! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) 769 ! 770 .inline vis_stdfa_ASI_PST8P,16 771 std %o0,[%sp+0x48] 772 ldd [%sp+0x48],%f4 773 stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P 774 .end 775 ! 776 ! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) 777 ! 778 .inline vis_stdfa_ASI_PST8PL,16 779 std %o0,[%sp+0x48] 780 ldd [%sp+0x48],%f4 781 stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL 782 .end 783 ! 784 ! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); 785 ! 786 .inline vis_stdfa_ASI_PST8P_int_pair,16 787 ld [%o0],%f4 788 ld [%o1],%f5 789 stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P 790 .end 791 ! 792 ! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) 793 ! 794 .inline vis_stdfa_ASI_PST8S,16 795 std %o0,[%sp+0x48] 796 ldd [%sp+0x48],%f4 797 stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S 798 .end 799 ! 800 ! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) 801 ! 802 .inline vis_stdfa_ASI_PST16P,16 803 std %o0,[%sp+0x48] 804 ldd [%sp+0x48],%f4 805 stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P 806 .end 807 ! 808 ! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) 809 ! 810 .inline vis_stdfa_ASI_PST16S,16 811 std %o0,[%sp+0x48] 812 ldd [%sp+0x48],%f4 813 stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S 814 .end 815 ! 816 ! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) 817 ! 818 .inline vis_stdfa_ASI_PST32P,16 819 std %o0,[%sp+0x48] 820 ldd [%sp+0x48],%f4 821 stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P 822 .end 823 ! 824 ! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) 825 ! 826 .inline vis_stdfa_ASI_PST32S,16 827 std %o0,[%sp+0x48] 828 ldd [%sp+0x48],%f4 829 stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S 830 .end 831 832 !-------------------------------------------------------------------- 833 ! Short store instructions 834 ! 835 ! vis_stdfa_ASI_FL8P(double frd, void *rs1) 836 ! 837 .inline vis_stdfa_ASI_FL8P,12 838 std %o0,[%sp+0x48] 839 ldd [%sp+0x48],%f4 840 stda %f4,[%o2]0xd0 ! ASI_FL8_P 841 .end 842 ! 843 ! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) 844 ! 845 .inline vis_stdfa_ASI_FL8P_index,16 846 std %o0,[%sp+0x48] 847 ldd [%sp+0x48],%f4 848 stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P 849 .end 850 ! 851 ! vis_stdfa_ASI_FL8S(double frd, void *rs1) 852 ! 853 .inline vis_stdfa_ASI_FL8S,12 854 std %o0,[%sp+0x48] 855 ldd [%sp+0x48],%f4 856 stda %f4,[%o2]0xd1 ! ASI_FL8_S 857 .end 858 ! 859 ! vis_stdfa_ASI_FL16P(double frd, void *rs1) 860 ! 861 .inline vis_stdfa_ASI_FL16P,12 862 std %o0,[%sp+0x48] 863 ldd [%sp+0x48],%f4 864 stda %f4,[%o2]0xd2 ! ASI_FL16_P 865 .end 866 ! 867 ! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) 868 ! 869 .inline vis_stdfa_ASI_FL16P_index,16 870 std %o0,[%sp+0x48] 871 ldd [%sp+0x48],%f4 872 stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P 873 .end 874 ! 875 ! vis_stdfa_ASI_FL16S(double frd, void *rs1) 876 ! 877 .inline vis_stdfa_ASI_FL16S,12 878 std %o0,[%sp+0x48] 879 ldd [%sp+0x48],%f4 880 stda %f4,[%o2]0xd3 ! ASI_FL16_S 881 .end 882 ! 883 ! vis_stdfa_ASI_FL8PL(double frd, void *rs1) 884 ! 885 .inline vis_stdfa_ASI_FL8PL,12 886 std %o0,[%sp+0x48] 887 ldd [%sp+0x48],%f4 888 stda %f4,[%o2]0xd8 ! ASI_FL8_PL 889 .end 890 ! 891 ! vis_stdfa_ASI_FL8SL(double frd, void *rs1) 892 ! 893 .inline vis_stdfa_ASI_FL8SL,12 894 std %o0,[%sp+0x48] 895 ldd [%sp+0x48],%f4 896 stda %f4,[%o2]0xd9 ! ASI_FL8_SL 897 .end 898 ! 899 ! vis_stdfa_ASI_FL16PL(double frd, void *rs1) 900 ! 901 .inline vis_stdfa_ASI_FL16PL,12 902 std %o0,[%sp+0x48] 903 ldd [%sp+0x48],%f4 904 stda %f4,[%o2]0xda ! ASI_FL16_PL 905 .end 906 ! 907 ! vis_stdfa_ASI_FL16SL(double frd, void *rs1) 908 ! 909 .inline vis_stdfa_ASI_FL16SL,12 910 std %o0,[%sp+0x48] 911 ldd [%sp+0x48],%f4 912 stda %f4,[%o2]0xdb ! ASI_FL16_SL 913 .end 914 915 !-------------------------------------------------------------------- 916 ! Short load instructions 917 ! 918 ! double vis_lddfa_ASI_FL8P(void *rs1) 919 ! 920 .inline vis_lddfa_ASI_FL8P,4 921 ldda [%o0]0xd0,%f4 ! ASI_FL8_P 922 fmovd %f4,%f0 ! Compiler can clean this up 923 .end 924 ! 925 ! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) 926 ! 927 .inline vis_lddfa_ASI_FL8P_index,8 928 ldda [%o0+%o1]0xd0,%f4 929 fmovd %f4,%f0 930 .end 931 ! 932 ! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) 933 ! 934 .inline vis_lddfa_ASI_FL8P_hi,8 935 sra %o1,16,%o1 936 ldda [%o0+%o1]0xd0,%f4 937 fmovd %f4,%f0 938 .end 939 ! 940 ! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) 941 ! 942 .inline vis_lddfa_ASI_FL8P_lo,8 943 sll %o1,16,%o1 944 sra %o1,16,%o1 945 ldda [%o0+%o1]0xd0,%f4 946 fmovd %f4,%f0 947 .end 948 ! 949 ! double vis_lddfa_ASI_FL8S(void *rs1) 950 ! 951 .inline vis_lddfa_ASI_FL8S,4 952 ldda [%o0]0xd1,%f4 ! ASI_FL8_S 953 fmovd %f4,%f0 954 .end 955 ! 956 ! double vis_lddfa_ASI_FL16P(void *rs1) 957 ! 958 .inline vis_lddfa_ASI_FL16P,4 959 ldda [%o0]0xd2,%f4 ! ASI_FL16_P 960 fmovd %f4,%f0 961 .end 962 ! 963 ! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) 964 ! 965 .inline vis_lddfa_ASI_FL16P_index,8 966 ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P 967 fmovd %f4,%f0 968 .end 969 ! 970 ! double vis_lddfa_ASI_FL16S(void *rs1) 971 ! 972 .inline vis_lddfa_ASI_FL16S,4 973 ldda [%o0]0xd3,%f4 ! ASI_FL16_S 974 fmovd %f4,%f0 975 .end 976 ! 977 ! double vis_lddfa_ASI_FL8PL(void *rs1) 978 ! 979 .inline vis_lddfa_ASI_FL8PL,4 980 ldda [%o0]0xd8,%f4 ! ASI_FL8_PL 981 fmovd %f4,%f0 982 .end 983 ! 984 ! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) 985 ! 986 .inline vis_lddfa_ASI_FL8PL_index,8 987 ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL 988 fmovd %f4,%f0 989 .end 990 ! 991 ! double vis_lddfa_ASI_FL8SL(void *rs1) 992 ! 993 .inline vis_lddfa_ASI_FL8SL,4 994 ldda [%o0]0xd9,%f4 ! ASI_FL8_SL 995 fmovd %f4,%f0 996 .end 997 ! 998 ! double vis_lddfa_ASI_FL16PL(void *rs1) 999 ! 1000 .inline vis_lddfa_ASI_FL16PL,4 1001 ldda [%o0]0xda,%f4 ! ASI_FL16_PL 1002 fmovd %f4,%f0 1003 .end 1004 ! 1005 ! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) 1006 ! 1007 .inline vis_lddfa_ASI_FL16PL_index,8 1008 ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL 1009 fmovd %f4,%f0 1010 .end 1011 ! 1012 ! double vis_lddfa_ASI_FL16SL(void *rs1) 1013 ! 1014 .inline vis_lddfa_ASI_FL16SL,4 1015 ldda [%o0]0xdb,%f4 ! ASI_FL16_SL 1016 fmovd %f4,%f0 1017 .end 1018 1019 !-------------------------------------------------------------------- 1020 ! Graphics status register 1021 ! 1022 ! unsigned int vis_read_gsr(void) 1023 ! 1024 .inline vis_read_gsr,0 1025 rd %gsr,%o0 1026 .end 1027 ! 1028 ! void vis_write_gsr(unsigned int /* GSR */) 1029 ! 1030 .inline vis_write_gsr,4 1031 wr %g0,%o0,%gsr 1032 .end 1033 1034 !-------------------------------------------------------------------- 1035 ! Voxel texture mapping 1036 ! 1037 ! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) 1038 ! 1039 .inline vis_array8,12 1040 sllx %o0,32,%o0 1041 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 1042 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 1043 array8 %o3,%o2,%o0 1044 .end 1045 ! 1046 ! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) 1047 ! 1048 .inline vis_array16,12 1049 sllx %o0,32,%o0 1050 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 1051 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 1052 array16 %o3,%o2,%o0 1053 .end 1054 ! 1055 ! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) 1056 ! 1057 .inline vis_array32,12 1058 sllx %o0,32,%o0 1059 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 1060 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 1061 array32 %o3,%o2,%o0 1062 .end 1063 1064 !-------------------------------------------------------------------- 1065 ! Register aliasing and type casts 1066 ! 1067 ! float vis_read_hi(double /* frs1 */); 1068 ! 1069 .inline vis_read_hi,8 1070 std %o0,[%sp+0x48] ! store double frs1 1071 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0; 1072 .end 1073 ! 1074 ! float vis_read_lo(double /* frs1 */); 1075 ! 1076 .inline vis_read_lo,8 1077 std %o0,[%sp+0x48] ! store double frs1 1078 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; 1079 fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; 1080 .end 1081 ! 1082 ! double vis_write_hi(double /* frs1 */, float /* frs2 */); 1083 ! 1084 .inline vis_write_hi,12 1085 std %o0,[%sp+0x48] ! store double frs1; 1086 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; 1087 st %o2,[%sp+0x44] ! store float frs2; 1088 ld [%sp+0x44],%f2 ! %f2 = float frs2; 1089 fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1; 1090 .end 1091 ! 1092 ! double vis_write_lo(double /* frs1 */, float /* frs2 */); 1093 ! 1094 .inline vis_write_lo,12 1095 std %o0,[%sp+0x48] ! store double frs1; 1096 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; 1097 st %o2,[%sp+0x44] ! store float frs2; 1098 ld [%sp+0x44],%f2 ! %f2 = float frs2; 1099 fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1; 1100 .end 1101 ! 1102 ! double vis_freg_pair(float /* frs1 */, float /* frs2 */); 1103 ! 1104 .inline vis_freg_pair,8 1105 st %o0,[%sp+0x48] ! store float frs1 1106 ld [%sp+0x48],%f0 1107 st %o1,[%sp+0x48] ! store float frs2 1108 ld [%sp+0x48],%f1 1109 .end 1110 ! 1111 ! float vis_to_float(unsigned int /*value*/); 1112 ! 1113 .inline vis_to_float,4 1114 st %o0,[%sp+0x48] 1115 ld [%sp+0x48],%f0 1116 .end 1117 ! 1118 ! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); 1119 ! 1120 .inline vis_to_double,8 1121 std %o0,[%sp+0x48] 1122 ldd [%sp+0x48],%f0 1123 .end 1124 ! 1125 ! double vis_to_double_dup(unsigned int /*value*/); 1126 ! 1127 .inline vis_to_double_dup,4 1128 st %o0,[%sp+0x48] 1129 ld [%sp+0x48],%f1 1130 fmovs %f1,%f0 ! duplicate value 1131 .end 1132 ! 1133 ! double vis_ll_to_double(unsigned long long /*value*/); 1134 ! 1135 .inline vis_ll_to_double,8 1136 std %o0,[%sp+0x48] 1137 ldd [%sp+0x48],%f0 1138 .end 1139 1140 !-------------------------------------------------------------------- 1141 ! Address space identifier (ASI) register 1142 ! 1143 ! unsigned int vis_read_asi(void) 1144 ! 1145 .inline vis_read_asi,0 1146 rd %asi,%o0 1147 .end 1148 ! 1149 ! void vis_write_asi(unsigned int /* ASI */) 1150 ! 1151 .inline vis_write_asi,4 1152 wr %g0,%o0,%asi 1153 .end 1154 1155 !-------------------------------------------------------------------- 1156 ! Load/store from/into alternate space 1157 ! 1158 ! float vis_ldfa_ASI_REG(void *rs1) 1159 ! 1160 .inline vis_ldfa_ASI_REG,4 1161 lda [%o0+0]%asi,%f4 1162 fmovs %f4,%f0 ! Compiler can clean this up 1163 .end 1164 ! 1165 ! float vis_ldfa_ASI_P(void *rs1) 1166 ! 1167 .inline vis_ldfa_ASI_P,4 1168 lda [%o0]0x80,%f4 ! ASI_P 1169 fmovs %f4,%f0 ! Compiler can clean this up 1170 .end 1171 ! 1172 ! float vis_ldfa_ASI_PL(void *rs1) 1173 ! 1174 .inline vis_ldfa_ASI_PL,4 1175 lda [%o0]0x88,%f4 ! ASI_PL 1176 fmovs %f4,%f0 ! Compiler can clean this up 1177 .end 1178 ! 1179 ! double vis_lddfa_ASI_REG(void *rs1) 1180 ! 1181 .inline vis_lddfa_ASI_REG,4 1182 ldda [%o0+0]%asi,%f4 1183 fmovd %f4,%f0 ! Compiler can clean this up 1184 .end 1185 ! 1186 ! double vis_lddfa_ASI_P(void *rs1) 1187 ! 1188 .inline vis_lddfa_ASI_P,4 1189 ldda [%o0]0x80,%f4 ! ASI_P 1190 fmovd %f4,%f0 ! Compiler can clean this up 1191 .end 1192 ! 1193 ! double vis_lddfa_ASI_PL(void *rs1) 1194 ! 1195 .inline vis_lddfa_ASI_PL,4 1196 ldda [%o0]0x88,%f4 ! ASI_PL 1197 fmovd %f4,%f0 ! Compiler can clean this up 1198 .end 1199 ! 1200 ! vis_stfa_ASI_REG(float frs, void *rs1) 1201 ! 1202 .inline vis_stfa_ASI_REG,8 1203 st %o0,[%sp+0x48] 1204 ld [%sp+0x48],%f4 1205 sta %f4,[%o1+0]%asi 1206 .end 1207 ! 1208 ! vis_stfa_ASI_P(float frs, void *rs1) 1209 ! 1210 .inline vis_stfa_ASI_P,8 1211 st %o0,[%sp+0x48] 1212 ld [%sp+0x48],%f4 1213 sta %f4,[%o1]0x80 ! ASI_P 1214 .end 1215 ! 1216 ! vis_stfa_ASI_PL(float frs, void *rs1) 1217 ! 1218 .inline vis_stfa_ASI_PL,8 1219 st %o0,[%sp+0x48] 1220 ld [%sp+0x48],%f4 1221 sta %f4,[%o1]0x88 ! ASI_PL 1222 .end 1223 ! 1224 ! vis_stdfa_ASI_REG(double frd, void *rs1) 1225 ! 1226 .inline vis_stdfa_ASI_REG,12 1227 std %o0,[%sp+0x48] 1228 ldd [%sp+0x48],%f4 1229 stda %f4,[%o2+0]%asi 1230 .end 1231 ! 1232 ! vis_stdfa_ASI_P(double frd, void *rs1) 1233 ! 1234 .inline vis_stdfa_ASI_P,12 1235 std %o0,[%sp+0x48] 1236 ldd [%sp+0x48],%f4 1237 stda %f4,[%o2]0x80 ! ASI_P 1238 .end 1239 ! 1240 ! vis_stdfa_ASI_PL(double frd, void *rs1) 1241 ! 1242 .inline vis_stdfa_ASI_PL,12 1243 std %o0,[%sp+0x48] 1244 ldd [%sp+0x48],%f4 1245 stda %f4,[%o2]0x88 ! ASI_PL 1246 .end 1247 ! 1248 ! unsigned short vis_lduha_ASI_REG(void *rs1) 1249 ! 1250 .inline vis_lduha_ASI_REG,4 1251 lduha [%o0+0]%asi,%o0 1252 .end 1253 ! 1254 ! unsigned short vis_lduha_ASI_P(void *rs1) 1255 ! 1256 .inline vis_lduha_ASI_P,4 1257 lduha [%o0]0x80,%o0 ! ASI_P 1258 .end 1259 ! 1260 ! unsigned short vis_lduha_ASI_PL(void *rs1) 1261 ! 1262 .inline vis_lduha_ASI_PL,4 1263 lduha [%o0]0x88,%o0 ! ASI_PL 1264 .end 1265 ! 1266 ! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) 1267 ! 1268 .inline vis_lduha_ASI_P_index,8 1269 lduha [%o0+%o1]0x80,%o0 ! ASI_P 1270 .end 1271 ! 1272 ! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) 1273 ! 1274 .inline vis_lduha_ASI_PL_index,8 1275 lduha [%o0+%o1]0x88,%o0 ! ASI_PL 1276 .end 1277 1278 !-------------------------------------------------------------------- 1279 ! Prefetch 1280 ! 1281 ! void vis_prefetch_read(void * /*address*/); 1282 ! 1283 .inline vis_prefetch_read,4 1284 prefetch [%o0+0],0 1285 .end 1286 ! 1287 ! void vis_prefetch_write(void * /*address*/); 1288 ! 1289 .inline vis_prefetch_write,4 1290 prefetch [%o0+0],2 1291 .end