Logic-vixl.cpp (156711B)
1 // Copyright 2015, ARM Limited 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifdef JS_SIMULATOR_ARM64 28 29 #include <cmath> 30 31 #include "jit/arm64/vixl/Simulator-vixl.h" 32 33 namespace vixl { 34 35 template<> double Simulator::FPDefaultNaN<double>() { 36 return kFP64DefaultNaN; 37 } 38 39 40 template<> float Simulator::FPDefaultNaN<float>() { 41 return kFP32DefaultNaN; 42 } 43 44 45 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 46 if (src >= 0) { 47 return UFixedToDouble(src, fbits, round); 48 } else { 49 // This works for all negative values, including INT64_MIN. 50 return -UFixedToDouble(-src, fbits, round); 51 } 52 } 53 54 55 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 56 // An input of 0 is a special case because the result is effectively 57 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 58 if (src == 0) { 59 return 0.0; 60 } 61 62 // Calculate the exponent. The highest significant bit will have the value 63 // 2^exponent. 64 const int highest_significant_bit = 63 - CountLeadingZeros(src); 65 const int64_t exponent = highest_significant_bit - fbits; 66 67 return FPRoundToDouble(0, exponent, src, round); 68 } 69 70 71 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 72 if (src >= 0) { 73 return UFixedToFloat(src, fbits, round); 74 } else { 75 // This works for all negative values, including INT64_MIN. 76 return -UFixedToFloat(-src, fbits, round); 77 } 78 } 79 80 81 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 82 // An input of 0 is a special case because the result is effectively 83 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 84 if (src == 0) { 85 return 0.0f; 86 } 87 88 // Calculate the exponent. The highest significant bit will have the value 89 // 2^exponent. 90 const int highest_significant_bit = 63 - CountLeadingZeros(src); 91 const int32_t exponent = highest_significant_bit - fbits; 92 93 return FPRoundToFloat(0, exponent, src, round); 94 } 95 96 97 void Simulator::ld1(VectorFormat vform, 98 LogicVRegister dst, 99 uint64_t addr) { 100 if (handle_wasm_seg_fault(addr, 16)) 101 return; 102 dst.ClearForWrite(vform); 103 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 104 dst.ReadUintFromMem(vform, i, addr); 105 addr += LaneSizeInBytesFromFormat(vform); 106 } 107 } 108 109 110 void Simulator::ld1(VectorFormat vform, 111 LogicVRegister dst, 112 int index, 113 uint64_t addr) { 114 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) 115 return; 116 dst.ReadUintFromMem(vform, index, addr); 117 } 118 119 120 void Simulator::ld1r(VectorFormat vform, 121 LogicVRegister dst, 122 uint64_t addr) { 123 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) 124 return; 125 dst.ClearForWrite(vform); 126 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 127 dst.ReadUintFromMem(vform, i, addr); 128 } 129 } 130 131 132 void Simulator::ld2(VectorFormat vform, 133 LogicVRegister dst1, 134 LogicVRegister dst2, 135 uint64_t addr1) { 136 if (handle_wasm_seg_fault(addr1, 16*2)) 137 return; 138 dst1.ClearForWrite(vform); 139 dst2.ClearForWrite(vform); 140 int esize = LaneSizeInBytesFromFormat(vform); 141 uint64_t addr2 = addr1 + esize; 142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 143 dst1.ReadUintFromMem(vform, i, addr1); 144 dst2.ReadUintFromMem(vform, i, addr2); 145 addr1 += 2 * esize; 146 addr2 += 2 * esize; 147 } 148 } 149 150 151 void Simulator::ld2(VectorFormat vform, 152 LogicVRegister dst1, 153 LogicVRegister dst2, 154 int index, 155 uint64_t addr1) { 156 if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*2)) 157 return; 158 dst1.ClearForWrite(vform); 159 dst2.ClearForWrite(vform); 160 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 161 dst1.ReadUintFromMem(vform, index, addr1); 162 dst2.ReadUintFromMem(vform, index, addr2); 163 } 164 165 166 void Simulator::ld2r(VectorFormat vform, 167 LogicVRegister dst1, 168 LogicVRegister dst2, 169 uint64_t addr) { 170 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2)) 171 return; 172 dst1.ClearForWrite(vform); 173 dst2.ClearForWrite(vform); 174 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 175 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 176 dst1.ReadUintFromMem(vform, i, addr); 177 dst2.ReadUintFromMem(vform, i, addr2); 178 } 179 } 180 181 182 void Simulator::ld3(VectorFormat vform, 183 LogicVRegister dst1, 184 LogicVRegister dst2, 185 LogicVRegister dst3, 186 uint64_t addr1) { 187 if (handle_wasm_seg_fault(addr1, 16*3)) 188 return; 189 dst1.ClearForWrite(vform); 190 dst2.ClearForWrite(vform); 191 dst3.ClearForWrite(vform); 192 int esize = LaneSizeInBytesFromFormat(vform); 193 uint64_t addr2 = addr1 + esize; 194 uint64_t addr3 = addr2 + esize; 195 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 196 dst1.ReadUintFromMem(vform, i, addr1); 197 dst2.ReadUintFromMem(vform, i, addr2); 198 dst3.ReadUintFromMem(vform, i, addr3); 199 addr1 += 3 * esize; 200 addr2 += 3 * esize; 201 addr3 += 3 * esize; 202 } 203 } 204 205 206 void Simulator::ld3(VectorFormat vform, 207 LogicVRegister dst1, 208 LogicVRegister dst2, 209 LogicVRegister dst3, 210 int index, 211 uint64_t addr1) { 212 if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*3)) 213 return; 214 dst1.ClearForWrite(vform); 215 dst2.ClearForWrite(vform); 216 dst3.ClearForWrite(vform); 217 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 218 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 219 dst1.ReadUintFromMem(vform, index, addr1); 220 dst2.ReadUintFromMem(vform, index, addr2); 221 dst3.ReadUintFromMem(vform, index, addr3); 222 } 223 224 225 void Simulator::ld3r(VectorFormat vform, 226 LogicVRegister dst1, 227 LogicVRegister dst2, 228 LogicVRegister dst3, 229 uint64_t addr) { 230 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3)) 231 return; 232 dst1.ClearForWrite(vform); 233 dst2.ClearForWrite(vform); 234 dst3.ClearForWrite(vform); 235 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 236 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 237 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 238 dst1.ReadUintFromMem(vform, i, addr); 239 dst2.ReadUintFromMem(vform, i, addr2); 240 dst3.ReadUintFromMem(vform, i, addr3); 241 } 242 } 243 244 245 void Simulator::ld4(VectorFormat vform, 246 LogicVRegister dst1, 247 LogicVRegister dst2, 248 LogicVRegister dst3, 249 LogicVRegister dst4, 250 uint64_t addr1) { 251 if (handle_wasm_seg_fault(addr1, 16*4)) 252 return; 253 dst1.ClearForWrite(vform); 254 dst2.ClearForWrite(vform); 255 dst3.ClearForWrite(vform); 256 dst4.ClearForWrite(vform); 257 int esize = LaneSizeInBytesFromFormat(vform); 258 uint64_t addr2 = addr1 + esize; 259 uint64_t addr3 = addr2 + esize; 260 uint64_t addr4 = addr3 + esize; 261 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 262 dst1.ReadUintFromMem(vform, i, addr1); 263 dst2.ReadUintFromMem(vform, i, addr2); 264 dst3.ReadUintFromMem(vform, i, addr3); 265 dst4.ReadUintFromMem(vform, i, addr4); 266 addr1 += 4 * esize; 267 addr2 += 4 * esize; 268 addr3 += 4 * esize; 269 addr4 += 4 * esize; 270 } 271 } 272 273 274 void Simulator::ld4(VectorFormat vform, 275 LogicVRegister dst1, 276 LogicVRegister dst2, 277 LogicVRegister dst3, 278 LogicVRegister dst4, 279 int index, 280 uint64_t addr1) { 281 if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*4)) 282 return; 283 dst1.ClearForWrite(vform); 284 dst2.ClearForWrite(vform); 285 dst3.ClearForWrite(vform); 286 dst4.ClearForWrite(vform); 287 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 288 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 289 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 290 dst1.ReadUintFromMem(vform, index, addr1); 291 dst2.ReadUintFromMem(vform, index, addr2); 292 dst3.ReadUintFromMem(vform, index, addr3); 293 dst4.ReadUintFromMem(vform, index, addr4); 294 } 295 296 297 void Simulator::ld4r(VectorFormat vform, 298 LogicVRegister dst1, 299 LogicVRegister dst2, 300 LogicVRegister dst3, 301 LogicVRegister dst4, 302 uint64_t addr) { 303 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4)) 304 return; 305 dst1.ClearForWrite(vform); 306 dst2.ClearForWrite(vform); 307 dst3.ClearForWrite(vform); 308 dst4.ClearForWrite(vform); 309 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 310 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 311 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 312 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 313 dst1.ReadUintFromMem(vform, i, addr); 314 dst2.ReadUintFromMem(vform, i, addr2); 315 dst3.ReadUintFromMem(vform, i, addr3); 316 dst4.ReadUintFromMem(vform, i, addr4); 317 } 318 } 319 320 321 void Simulator::st1(VectorFormat vform, 322 LogicVRegister src, 323 uint64_t addr) { 324 if (handle_wasm_seg_fault(addr, 16)) 325 return; 326 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 327 src.WriteUintToMem(vform, i, addr); 328 addr += LaneSizeInBytesFromFormat(vform); 329 } 330 } 331 332 333 void Simulator::st1(VectorFormat vform, 334 LogicVRegister src, 335 int index, 336 uint64_t addr) { 337 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) 338 return; 339 src.WriteUintToMem(vform, index, addr); 340 } 341 342 343 void Simulator::st2(VectorFormat vform, 344 LogicVRegister dst, 345 LogicVRegister dst2, 346 uint64_t addr) { 347 if (handle_wasm_seg_fault(addr, 16*2)) 348 return; 349 int esize = LaneSizeInBytesFromFormat(vform); 350 uint64_t addr2 = addr + esize; 351 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 352 dst.WriteUintToMem(vform, i, addr); 353 dst2.WriteUintToMem(vform, i, addr2); 354 addr += 2 * esize; 355 addr2 += 2 * esize; 356 } 357 } 358 359 360 void Simulator::st2(VectorFormat vform, 361 LogicVRegister dst, 362 LogicVRegister dst2, 363 int index, 364 uint64_t addr) { 365 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2)) 366 return; 367 int esize = LaneSizeInBytesFromFormat(vform); 368 dst.WriteUintToMem(vform, index, addr); 369 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 370 } 371 372 373 void Simulator::st3(VectorFormat vform, 374 LogicVRegister dst, 375 LogicVRegister dst2, 376 LogicVRegister dst3, 377 uint64_t addr) { 378 if (handle_wasm_seg_fault(addr, 16*3)) 379 return; 380 int esize = LaneSizeInBytesFromFormat(vform); 381 uint64_t addr2 = addr + esize; 382 uint64_t addr3 = addr2 + esize; 383 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 384 dst.WriteUintToMem(vform, i, addr); 385 dst2.WriteUintToMem(vform, i, addr2); 386 dst3.WriteUintToMem(vform, i, addr3); 387 addr += 3 * esize; 388 addr2 += 3 * esize; 389 addr3 += 3 * esize; 390 } 391 } 392 393 394 void Simulator::st3(VectorFormat vform, 395 LogicVRegister dst, 396 LogicVRegister dst2, 397 LogicVRegister dst3, 398 int index, 399 uint64_t addr) { 400 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3)) 401 return; 402 int esize = LaneSizeInBytesFromFormat(vform); 403 dst.WriteUintToMem(vform, index, addr); 404 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 405 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 406 } 407 408 409 void Simulator::st4(VectorFormat vform, 410 LogicVRegister dst, 411 LogicVRegister dst2, 412 LogicVRegister dst3, 413 LogicVRegister dst4, 414 uint64_t addr) { 415 if (handle_wasm_seg_fault(addr, 16*4)) 416 return; 417 int esize = LaneSizeInBytesFromFormat(vform); 418 uint64_t addr2 = addr + esize; 419 uint64_t addr3 = addr2 + esize; 420 uint64_t addr4 = addr3 + esize; 421 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 422 dst.WriteUintToMem(vform, i, addr); 423 dst2.WriteUintToMem(vform, i, addr2); 424 dst3.WriteUintToMem(vform, i, addr3); 425 dst4.WriteUintToMem(vform, i, addr4); 426 addr += 4 * esize; 427 addr2 += 4 * esize; 428 addr3 += 4 * esize; 429 addr4 += 4 * esize; 430 } 431 } 432 433 434 void Simulator::st4(VectorFormat vform, 435 LogicVRegister dst, 436 LogicVRegister dst2, 437 LogicVRegister dst3, 438 LogicVRegister dst4, 439 int index, 440 uint64_t addr) { 441 if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4)) 442 return; 443 int esize = LaneSizeInBytesFromFormat(vform); 444 dst.WriteUintToMem(vform, index, addr); 445 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 446 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 447 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 448 } 449 450 451 LogicVRegister Simulator::cmp(VectorFormat vform, 452 LogicVRegister dst, 453 const LogicVRegister& src1, 454 const LogicVRegister& src2, 455 Condition cond) { 456 dst.ClearForWrite(vform); 457 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 458 int64_t sa = src1.Int(vform, i); 459 int64_t sb = src2.Int(vform, i); 460 uint64_t ua = src1.Uint(vform, i); 461 uint64_t ub = src2.Uint(vform, i); 462 bool result = false; 463 switch (cond) { 464 case eq: result = (ua == ub); break; 465 case ge: result = (sa >= sb); break; 466 case gt: result = (sa > sb) ; break; 467 case hi: result = (ua > ub) ; break; 468 case hs: result = (ua >= ub); break; 469 case lt: result = (sa < sb) ; break; 470 case le: result = (sa <= sb); break; 471 default: VIXL_UNREACHABLE(); break; 472 } 473 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 474 } 475 return dst; 476 } 477 478 479 LogicVRegister Simulator::cmp(VectorFormat vform, 480 LogicVRegister dst, 481 const LogicVRegister& src1, 482 int imm, 483 Condition cond) { 484 SimVRegister temp; 485 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 486 return cmp(vform, dst, src1, imm_reg, cond); 487 } 488 489 490 LogicVRegister Simulator::cmptst(VectorFormat vform, 491 LogicVRegister dst, 492 const LogicVRegister& src1, 493 const LogicVRegister& src2) { 494 dst.ClearForWrite(vform); 495 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 496 uint64_t ua = src1.Uint(vform, i); 497 uint64_t ub = src2.Uint(vform, i); 498 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 499 } 500 return dst; 501 } 502 503 504 LogicVRegister Simulator::add(VectorFormat vform, 505 LogicVRegister dst, 506 const LogicVRegister& src1, 507 const LogicVRegister& src2) { 508 dst.ClearForWrite(vform); 509 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 510 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 511 // Test for unsigned saturation. 512 uint64_t ua = src1.UintLeftJustified(vform, i); 513 uint64_t ub = src2.UintLeftJustified(vform, i); 514 uint64_t ur = ua + ub; 515 if (ur < ua) { 516 dst.SetUnsignedSat(i, true); 517 } 518 519 // Test for signed saturation. 520 int64_t sa = src1.IntLeftJustified(vform, i); 521 int64_t sb = src2.IntLeftJustified(vform, i); 522 int64_t sr = sa + sb; 523 // If the signs of the operands are the same, but different from the result, 524 // there was an overflow. 525 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 526 dst.SetSignedSat(i, sa >= 0); 527 } 528 529 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 530 } 531 return dst; 532 } 533 534 535 LogicVRegister Simulator::addp(VectorFormat vform, 536 LogicVRegister dst, 537 const LogicVRegister& src1, 538 const LogicVRegister& src2) { 539 SimVRegister temp1, temp2; 540 uzp1(vform, temp1, src1, src2); 541 uzp2(vform, temp2, src1, src2); 542 add(vform, dst, temp1, temp2); 543 return dst; 544 } 545 546 547 LogicVRegister Simulator::mla(VectorFormat vform, 548 LogicVRegister dst, 549 const LogicVRegister& src1, 550 const LogicVRegister& src2) { 551 SimVRegister temp; 552 mul(vform, temp, src1, src2); 553 add(vform, dst, dst, temp); 554 return dst; 555 } 556 557 558 LogicVRegister Simulator::mls(VectorFormat vform, 559 LogicVRegister dst, 560 const LogicVRegister& src1, 561 const LogicVRegister& src2) { 562 SimVRegister temp; 563 mul(vform, temp, src1, src2); 564 sub(vform, dst, dst, temp); 565 return dst; 566 } 567 568 569 LogicVRegister Simulator::mul(VectorFormat vform, 570 LogicVRegister dst, 571 const LogicVRegister& src1, 572 const LogicVRegister& src2) { 573 dst.ClearForWrite(vform); 574 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 575 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 576 } 577 return dst; 578 } 579 580 581 LogicVRegister Simulator::mul(VectorFormat vform, 582 LogicVRegister dst, 583 const LogicVRegister& src1, 584 const LogicVRegister& src2, 585 int index) { 586 SimVRegister temp; 587 VectorFormat indexform = VectorFormatFillQ(vform); 588 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 589 } 590 591 592 LogicVRegister Simulator::mla(VectorFormat vform, 593 LogicVRegister dst, 594 const LogicVRegister& src1, 595 const LogicVRegister& src2, 596 int index) { 597 SimVRegister temp; 598 VectorFormat indexform = VectorFormatFillQ(vform); 599 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 600 } 601 602 603 LogicVRegister Simulator::mls(VectorFormat vform, 604 LogicVRegister dst, 605 const LogicVRegister& src1, 606 const LogicVRegister& src2, 607 int index) { 608 SimVRegister temp; 609 VectorFormat indexform = VectorFormatFillQ(vform); 610 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 611 } 612 613 614 LogicVRegister Simulator::smull(VectorFormat vform, 615 LogicVRegister dst, 616 const LogicVRegister& src1, 617 const LogicVRegister& src2, 618 int index) { 619 SimVRegister temp; 620 VectorFormat indexform = 621 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 622 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 623 } 624 625 626 LogicVRegister Simulator::smull2(VectorFormat vform, 627 LogicVRegister dst, 628 const LogicVRegister& src1, 629 const LogicVRegister& src2, 630 int index) { 631 SimVRegister temp; 632 VectorFormat indexform = 633 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 634 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 635 } 636 637 638 LogicVRegister Simulator::umull(VectorFormat vform, 639 LogicVRegister dst, 640 const LogicVRegister& src1, 641 const LogicVRegister& src2, 642 int index) { 643 SimVRegister temp; 644 VectorFormat indexform = 645 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 646 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 647 } 648 649 650 LogicVRegister Simulator::umull2(VectorFormat vform, 651 LogicVRegister dst, 652 const LogicVRegister& src1, 653 const LogicVRegister& src2, 654 int index) { 655 SimVRegister temp; 656 VectorFormat indexform = 657 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 658 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 659 } 660 661 662 LogicVRegister Simulator::smlal(VectorFormat vform, 663 LogicVRegister dst, 664 const LogicVRegister& src1, 665 const LogicVRegister& src2, 666 int index) { 667 SimVRegister temp; 668 VectorFormat indexform = 669 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 670 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 671 } 672 673 674 LogicVRegister Simulator::smlal2(VectorFormat vform, 675 LogicVRegister dst, 676 const LogicVRegister& src1, 677 const LogicVRegister& src2, 678 int index) { 679 SimVRegister temp; 680 VectorFormat indexform = 681 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 682 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 683 } 684 685 686 LogicVRegister Simulator::umlal(VectorFormat vform, 687 LogicVRegister dst, 688 const LogicVRegister& src1, 689 const LogicVRegister& src2, 690 int index) { 691 SimVRegister temp; 692 VectorFormat indexform = 693 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 694 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 695 } 696 697 698 LogicVRegister Simulator::umlal2(VectorFormat vform, 699 LogicVRegister dst, 700 const LogicVRegister& src1, 701 const LogicVRegister& src2, 702 int index) { 703 SimVRegister temp; 704 VectorFormat indexform = 705 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 706 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 707 } 708 709 710 LogicVRegister Simulator::smlsl(VectorFormat vform, 711 LogicVRegister dst, 712 const LogicVRegister& src1, 713 const LogicVRegister& src2, 714 int index) { 715 SimVRegister temp; 716 VectorFormat indexform = 717 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 718 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 719 } 720 721 722 LogicVRegister Simulator::smlsl2(VectorFormat vform, 723 LogicVRegister dst, 724 const LogicVRegister& src1, 725 const LogicVRegister& src2, 726 int index) { 727 SimVRegister temp; 728 VectorFormat indexform = 729 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 730 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 731 } 732 733 734 LogicVRegister Simulator::umlsl(VectorFormat vform, 735 LogicVRegister dst, 736 const LogicVRegister& src1, 737 const LogicVRegister& src2, 738 int index) { 739 SimVRegister temp; 740 VectorFormat indexform = 741 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 742 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 743 } 744 745 746 LogicVRegister Simulator::umlsl2(VectorFormat vform, 747 LogicVRegister dst, 748 const LogicVRegister& src1, 749 const LogicVRegister& src2, 750 int index) { 751 SimVRegister temp; 752 VectorFormat indexform = 753 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 754 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 755 } 756 757 758 LogicVRegister Simulator::sqdmull(VectorFormat vform, 759 LogicVRegister dst, 760 const LogicVRegister& src1, 761 const LogicVRegister& src2, 762 int index) { 763 SimVRegister temp; 764 VectorFormat indexform = 765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 766 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 767 } 768 769 770 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 771 LogicVRegister dst, 772 const LogicVRegister& src1, 773 const LogicVRegister& src2, 774 int index) { 775 SimVRegister temp; 776 VectorFormat indexform = 777 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 778 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 779 } 780 781 782 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 783 LogicVRegister dst, 784 const LogicVRegister& src1, 785 const LogicVRegister& src2, 786 int index) { 787 SimVRegister temp; 788 VectorFormat indexform = 789 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 790 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 791 } 792 793 794 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 795 LogicVRegister dst, 796 const LogicVRegister& src1, 797 const LogicVRegister& src2, 798 int index) { 799 SimVRegister temp; 800 VectorFormat indexform = 801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 802 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 803 } 804 805 806 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 807 LogicVRegister dst, 808 const LogicVRegister& src1, 809 const LogicVRegister& src2, 810 int index) { 811 SimVRegister temp; 812 VectorFormat indexform = 813 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 814 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 815 } 816 817 818 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 819 LogicVRegister dst, 820 const LogicVRegister& src1, 821 const LogicVRegister& src2, 822 int index) { 823 SimVRegister temp; 824 VectorFormat indexform = 825 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 826 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 827 } 828 829 830 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 831 LogicVRegister dst, 832 const LogicVRegister& src1, 833 const LogicVRegister& src2, 834 int index) { 835 SimVRegister temp; 836 VectorFormat indexform = VectorFormatFillQ(vform); 837 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 838 } 839 840 841 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 842 LogicVRegister dst, 843 const LogicVRegister& src1, 844 const LogicVRegister& src2, 845 int index) { 846 SimVRegister temp; 847 VectorFormat indexform = VectorFormatFillQ(vform); 848 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 849 } 850 851 852 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { 853 uint16_t result = 0; 854 uint16_t extended_op2 = op2; 855 for (int i = 0; i < 8; ++i) { 856 if ((op1 >> i) & 1) { 857 result = result ^ (extended_op2 << i); 858 } 859 } 860 return result; 861 } 862 863 864 LogicVRegister Simulator::pmul(VectorFormat vform, 865 LogicVRegister dst, 866 const LogicVRegister& src1, 867 const LogicVRegister& src2) { 868 dst.ClearForWrite(vform); 869 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 870 dst.SetUint(vform, i, 871 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 872 } 873 return dst; 874 } 875 876 877 LogicVRegister Simulator::pmull(VectorFormat vform, 878 LogicVRegister dst, 879 const LogicVRegister& src1, 880 const LogicVRegister& src2) { 881 VectorFormat vform_src = VectorFormatHalfWidth(vform); 882 dst.ClearForWrite(vform); 883 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 884 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), 885 src2.Uint(vform_src, i))); 886 } 887 return dst; 888 } 889 890 891 LogicVRegister Simulator::pmull2(VectorFormat vform, 892 LogicVRegister dst, 893 const LogicVRegister& src1, 894 const LogicVRegister& src2) { 895 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 896 dst.ClearForWrite(vform); 897 int lane_count = LaneCountFromFormat(vform); 898 for (int i = 0; i < lane_count; i++) { 899 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), 900 src2.Uint(vform_src, lane_count + i))); 901 } 902 return dst; 903 } 904 905 906 LogicVRegister Simulator::sub(VectorFormat vform, 907 LogicVRegister dst, 908 const LogicVRegister& src1, 909 const LogicVRegister& src2) { 910 dst.ClearForWrite(vform); 911 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 912 // Test for unsigned saturation. 913 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 914 dst.SetUnsignedSat(i, false); 915 } 916 917 // Test for signed saturation. 918 int64_t sa = src1.IntLeftJustified(vform, i); 919 int64_t sb = src2.IntLeftJustified(vform, i); 920 int64_t sr = sa - sb; 921 // If the signs of the operands are different, and the sign of the first 922 // operand doesn't match the result, there was an overflow. 923 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 924 dst.SetSignedSat(i, sr < 0); 925 } 926 927 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 928 } 929 return dst; 930 } 931 932 933 LogicVRegister Simulator::and_(VectorFormat vform, 934 LogicVRegister dst, 935 const LogicVRegister& src1, 936 const LogicVRegister& src2) { 937 dst.ClearForWrite(vform); 938 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 939 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 940 } 941 return dst; 942 } 943 944 945 LogicVRegister Simulator::orr(VectorFormat vform, 946 LogicVRegister dst, 947 const LogicVRegister& src1, 948 const LogicVRegister& src2) { 949 dst.ClearForWrite(vform); 950 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 951 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 952 } 953 return dst; 954 } 955 956 957 LogicVRegister Simulator::orn(VectorFormat vform, 958 LogicVRegister dst, 959 const LogicVRegister& src1, 960 const LogicVRegister& src2) { 961 dst.ClearForWrite(vform); 962 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 963 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 964 } 965 return dst; 966 } 967 968 969 LogicVRegister Simulator::eor(VectorFormat vform, 970 LogicVRegister dst, 971 const LogicVRegister& src1, 972 const LogicVRegister& src2) { 973 dst.ClearForWrite(vform); 974 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 975 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 976 } 977 return dst; 978 } 979 980 981 LogicVRegister Simulator::bic(VectorFormat vform, 982 LogicVRegister dst, 983 const LogicVRegister& src1, 984 const LogicVRegister& src2) { 985 dst.ClearForWrite(vform); 986 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 987 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 988 } 989 return dst; 990 } 991 992 993 LogicVRegister Simulator::bic(VectorFormat vform, 994 LogicVRegister dst, 995 const LogicVRegister& src, 996 uint64_t imm) { 997 uint64_t result[16]; 998 int laneCount = LaneCountFromFormat(vform); 999 for (int i = 0; i < laneCount; ++i) { 1000 result[i] = src.Uint(vform, i) & ~imm; 1001 } 1002 dst.ClearForWrite(vform); 1003 for (int i = 0; i < laneCount; ++i) { 1004 dst.SetUint(vform, i, result[i]); 1005 } 1006 return dst; 1007 } 1008 1009 1010 LogicVRegister Simulator::bif(VectorFormat vform, 1011 LogicVRegister dst, 1012 const LogicVRegister& src1, 1013 const LogicVRegister& src2) { 1014 dst.ClearForWrite(vform); 1015 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1016 uint64_t operand1 = dst.Uint(vform, i); 1017 uint64_t operand2 = ~src2.Uint(vform, i); 1018 uint64_t operand3 = src1.Uint(vform, i); 1019 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1020 dst.SetUint(vform, i, result); 1021 } 1022 return dst; 1023 } 1024 1025 1026 LogicVRegister Simulator::bit(VectorFormat vform, 1027 LogicVRegister dst, 1028 const LogicVRegister& src1, 1029 const LogicVRegister& src2) { 1030 dst.ClearForWrite(vform); 1031 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1032 uint64_t operand1 = dst.Uint(vform, i); 1033 uint64_t operand2 = src2.Uint(vform, i); 1034 uint64_t operand3 = src1.Uint(vform, i); 1035 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1036 dst.SetUint(vform, i, result); 1037 } 1038 return dst; 1039 } 1040 1041 1042 LogicVRegister Simulator::bsl(VectorFormat vform, 1043 LogicVRegister dst, 1044 const LogicVRegister& src1, 1045 const LogicVRegister& src2) { 1046 dst.ClearForWrite(vform); 1047 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1048 uint64_t operand1 = src2.Uint(vform, i); 1049 uint64_t operand2 = dst.Uint(vform, i); 1050 uint64_t operand3 = src1.Uint(vform, i); 1051 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1052 dst.SetUint(vform, i, result); 1053 } 1054 return dst; 1055 } 1056 1057 1058 LogicVRegister Simulator::sminmax(VectorFormat vform, 1059 LogicVRegister dst, 1060 const LogicVRegister& src1, 1061 const LogicVRegister& src2, 1062 bool max) { 1063 dst.ClearForWrite(vform); 1064 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1065 int64_t src1_val = src1.Int(vform, i); 1066 int64_t src2_val = src2.Int(vform, i); 1067 int64_t dst_val; 1068 if (max == true) { 1069 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1070 } else { 1071 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1072 } 1073 dst.SetInt(vform, i, dst_val); 1074 } 1075 return dst; 1076 } 1077 1078 1079 LogicVRegister Simulator::smax(VectorFormat vform, 1080 LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2) { 1083 return sminmax(vform, dst, src1, src2, true); 1084 } 1085 1086 1087 LogicVRegister Simulator::smin(VectorFormat vform, 1088 LogicVRegister dst, 1089 const LogicVRegister& src1, 1090 const LogicVRegister& src2) { 1091 return sminmax(vform, dst, src1, src2, false); 1092 } 1093 1094 1095 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1096 LogicVRegister dst, 1097 int dst_index, 1098 const LogicVRegister& src, 1099 bool max) { 1100 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1101 int64_t src1_val = src.Int(vform, i); 1102 int64_t src2_val = src.Int(vform, i + 1); 1103 int64_t dst_val; 1104 if (max == true) { 1105 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1106 } else { 1107 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1108 } 1109 dst.SetInt(vform, dst_index + (i >> 1), dst_val); 1110 } 1111 return dst; 1112 } 1113 1114 1115 LogicVRegister Simulator::smaxp(VectorFormat vform, 1116 LogicVRegister dst, 1117 const LogicVRegister& src1, 1118 const LogicVRegister& src2) { 1119 dst.ClearForWrite(vform); 1120 sminmaxp(vform, dst, 0, src1, true); 1121 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1122 return dst; 1123 } 1124 1125 1126 LogicVRegister Simulator::sminp(VectorFormat vform, 1127 LogicVRegister dst, 1128 const LogicVRegister& src1, 1129 const LogicVRegister& src2) { 1130 dst.ClearForWrite(vform); 1131 sminmaxp(vform, dst, 0, src1, false); 1132 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1133 return dst; 1134 } 1135 1136 1137 LogicVRegister Simulator::addp(VectorFormat vform, 1138 LogicVRegister dst, 1139 const LogicVRegister& src) { 1140 VIXL_ASSERT(vform == kFormatD); 1141 1142 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1143 dst.ClearForWrite(vform); 1144 dst.SetInt(vform, 0, dst_val); 1145 return dst; 1146 } 1147 1148 1149 LogicVRegister Simulator::addv(VectorFormat vform, 1150 LogicVRegister dst, 1151 const LogicVRegister& src) { 1152 VectorFormat vform_dst 1153 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1154 1155 1156 int64_t dst_val = 0; 1157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1158 dst_val += src.Int(vform, i); 1159 } 1160 1161 dst.ClearForWrite(vform_dst); 1162 dst.SetInt(vform_dst, 0, dst_val); 1163 return dst; 1164 } 1165 1166 1167 LogicVRegister Simulator::saddlv(VectorFormat vform, 1168 LogicVRegister dst, 1169 const LogicVRegister& src) { 1170 VectorFormat vform_dst 1171 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1172 1173 int64_t dst_val = 0; 1174 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1175 dst_val += src.Int(vform, i); 1176 } 1177 1178 dst.ClearForWrite(vform_dst); 1179 dst.SetInt(vform_dst, 0, dst_val); 1180 return dst; 1181 } 1182 1183 1184 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1185 LogicVRegister dst, 1186 const LogicVRegister& src) { 1187 VectorFormat vform_dst 1188 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1189 1190 uint64_t dst_val = 0; 1191 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1192 dst_val += src.Uint(vform, i); 1193 } 1194 1195 dst.ClearForWrite(vform_dst); 1196 dst.SetUint(vform_dst, 0, dst_val); 1197 return dst; 1198 } 1199 1200 1201 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1202 LogicVRegister dst, 1203 const LogicVRegister& src, 1204 bool max) { 1205 dst.ClearForWrite(vform); 1206 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1208 int64_t src_val = src.Int(vform, i); 1209 if (max == true) { 1210 dst_val = (src_val > dst_val) ? src_val : dst_val; 1211 } else { 1212 dst_val = (src_val < dst_val) ? src_val : dst_val; 1213 } 1214 } 1215 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1216 dst.SetInt(vform, i, 0); 1217 } 1218 dst.SetInt(vform, 0, dst_val); 1219 return dst; 1220 } 1221 1222 1223 LogicVRegister Simulator::smaxv(VectorFormat vform, 1224 LogicVRegister dst, 1225 const LogicVRegister& src) { 1226 sminmaxv(vform, dst, src, true); 1227 return dst; 1228 } 1229 1230 1231 LogicVRegister Simulator::sminv(VectorFormat vform, 1232 LogicVRegister dst, 1233 const LogicVRegister& src) { 1234 sminmaxv(vform, dst, src, false); 1235 return dst; 1236 } 1237 1238 1239 LogicVRegister Simulator::uminmax(VectorFormat vform, 1240 LogicVRegister dst, 1241 const LogicVRegister& src1, 1242 const LogicVRegister& src2, 1243 bool max) { 1244 dst.ClearForWrite(vform); 1245 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1246 uint64_t src1_val = src1.Uint(vform, i); 1247 uint64_t src2_val = src2.Uint(vform, i); 1248 uint64_t dst_val; 1249 if (max == true) { 1250 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1251 } else { 1252 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1253 } 1254 dst.SetUint(vform, i, dst_val); 1255 } 1256 return dst; 1257 } 1258 1259 1260 LogicVRegister Simulator::umax(VectorFormat vform, 1261 LogicVRegister dst, 1262 const LogicVRegister& src1, 1263 const LogicVRegister& src2) { 1264 return uminmax(vform, dst, src1, src2, true); 1265 } 1266 1267 1268 LogicVRegister Simulator::umin(VectorFormat vform, 1269 LogicVRegister dst, 1270 const LogicVRegister& src1, 1271 const LogicVRegister& src2) { 1272 return uminmax(vform, dst, src1, src2, false); 1273 } 1274 1275 1276 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1277 LogicVRegister dst, 1278 int dst_index, 1279 const LogicVRegister& src, 1280 bool max) { 1281 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1282 uint64_t src1_val = src.Uint(vform, i); 1283 uint64_t src2_val = src.Uint(vform, i + 1); 1284 uint64_t dst_val; 1285 if (max == true) { 1286 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1287 } else { 1288 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1289 } 1290 dst.SetUint(vform, dst_index + (i >> 1), dst_val); 1291 } 1292 return dst; 1293 } 1294 1295 1296 LogicVRegister Simulator::umaxp(VectorFormat vform, 1297 LogicVRegister dst, 1298 const LogicVRegister& src1, 1299 const LogicVRegister& src2) { 1300 dst.ClearForWrite(vform); 1301 uminmaxp(vform, dst, 0, src1, true); 1302 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1303 return dst; 1304 } 1305 1306 1307 LogicVRegister Simulator::uminp(VectorFormat vform, 1308 LogicVRegister dst, 1309 const LogicVRegister& src1, 1310 const LogicVRegister& src2) { 1311 dst.ClearForWrite(vform); 1312 uminmaxp(vform, dst, 0, src1, false); 1313 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1314 return dst; 1315 } 1316 1317 1318 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1319 LogicVRegister dst, 1320 const LogicVRegister& src, 1321 bool max) { 1322 dst.ClearForWrite(vform); 1323 uint64_t dst_val = max ? 0 : UINT64_MAX; 1324 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1325 uint64_t src_val = src.Uint(vform, i); 1326 if (max == true) { 1327 dst_val = (src_val > dst_val) ? src_val : dst_val; 1328 } else { 1329 dst_val = (src_val < dst_val) ? src_val : dst_val; 1330 } 1331 } 1332 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1333 dst.SetUint(vform, i, 0); 1334 } 1335 dst.SetUint(vform, 0, dst_val); 1336 return dst; 1337 } 1338 1339 1340 LogicVRegister Simulator::umaxv(VectorFormat vform, 1341 LogicVRegister dst, 1342 const LogicVRegister& src) { 1343 uminmaxv(vform, dst, src, true); 1344 return dst; 1345 } 1346 1347 1348 LogicVRegister Simulator::uminv(VectorFormat vform, 1349 LogicVRegister dst, 1350 const LogicVRegister& src) { 1351 uminmaxv(vform, dst, src, false); 1352 return dst; 1353 } 1354 1355 1356 LogicVRegister Simulator::shl(VectorFormat vform, 1357 LogicVRegister dst, 1358 const LogicVRegister& src, 1359 int shift) { 1360 VIXL_ASSERT(shift >= 0); 1361 SimVRegister temp; 1362 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1363 return ushl(vform, dst, src, shiftreg); 1364 } 1365 1366 1367 LogicVRegister Simulator::sshll(VectorFormat vform, 1368 LogicVRegister dst, 1369 const LogicVRegister& src, 1370 int shift) { 1371 VIXL_ASSERT(shift >= 0); 1372 SimVRegister temp1, temp2; 1373 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1374 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1375 return sshl(vform, dst, extendedreg, shiftreg); 1376 } 1377 1378 1379 LogicVRegister Simulator::sshll2(VectorFormat vform, 1380 LogicVRegister dst, 1381 const LogicVRegister& src, 1382 int shift) { 1383 VIXL_ASSERT(shift >= 0); 1384 SimVRegister temp1, temp2; 1385 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1386 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1387 return sshl(vform, dst, extendedreg, shiftreg); 1388 } 1389 1390 1391 LogicVRegister Simulator::shll(VectorFormat vform, 1392 LogicVRegister dst, 1393 const LogicVRegister& src) { 1394 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1395 return sshll(vform, dst, src, shift); 1396 } 1397 1398 1399 LogicVRegister Simulator::shll2(VectorFormat vform, 1400 LogicVRegister dst, 1401 const LogicVRegister& src) { 1402 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1403 return sshll2(vform, dst, src, shift); 1404 } 1405 1406 1407 LogicVRegister Simulator::ushll(VectorFormat vform, 1408 LogicVRegister dst, 1409 const LogicVRegister& src, 1410 int shift) { 1411 VIXL_ASSERT(shift >= 0); 1412 SimVRegister temp1, temp2; 1413 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1414 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1415 return ushl(vform, dst, extendedreg, shiftreg); 1416 } 1417 1418 1419 LogicVRegister Simulator::ushll2(VectorFormat vform, 1420 LogicVRegister dst, 1421 const LogicVRegister& src, 1422 int shift) { 1423 VIXL_ASSERT(shift >= 0); 1424 SimVRegister temp1, temp2; 1425 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1426 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1427 return ushl(vform, dst, extendedreg, shiftreg); 1428 } 1429 1430 1431 LogicVRegister Simulator::sli(VectorFormat vform, 1432 LogicVRegister dst, 1433 const LogicVRegister& src, 1434 int shift) { 1435 dst.ClearForWrite(vform); 1436 int laneCount = LaneCountFromFormat(vform); 1437 for (int i = 0; i < laneCount; i++) { 1438 uint64_t src_lane = src.Uint(vform, i); 1439 uint64_t dst_lane = dst.Uint(vform, i); 1440 uint64_t shifted = src_lane << shift; 1441 uint64_t mask = MaxUintFromFormat(vform) << shift; 1442 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1443 } 1444 return dst; 1445 } 1446 1447 1448 LogicVRegister Simulator::sqshl(VectorFormat vform, 1449 LogicVRegister dst, 1450 const LogicVRegister& src, 1451 int shift) { 1452 VIXL_ASSERT(shift >= 0); 1453 SimVRegister temp; 1454 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1455 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1456 } 1457 1458 1459 LogicVRegister Simulator::uqshl(VectorFormat vform, 1460 LogicVRegister dst, 1461 const LogicVRegister& src, 1462 int shift) { 1463 VIXL_ASSERT(shift >= 0); 1464 SimVRegister temp; 1465 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1466 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1467 } 1468 1469 1470 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1471 LogicVRegister dst, 1472 const LogicVRegister& src, 1473 int shift) { 1474 VIXL_ASSERT(shift >= 0); 1475 SimVRegister temp; 1476 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1477 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1478 } 1479 1480 1481 LogicVRegister Simulator::sri(VectorFormat vform, 1482 LogicVRegister dst, 1483 const LogicVRegister& src, 1484 int shift) { 1485 dst.ClearForWrite(vform); 1486 int laneCount = LaneCountFromFormat(vform); 1487 VIXL_ASSERT((shift > 0) && 1488 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1489 for (int i = 0; i < laneCount; i++) { 1490 uint64_t src_lane = src.Uint(vform, i); 1491 uint64_t dst_lane = dst.Uint(vform, i); 1492 uint64_t shifted; 1493 uint64_t mask; 1494 if (shift == 64) { 1495 shifted = 0; 1496 mask = 0; 1497 } else { 1498 shifted = src_lane >> shift; 1499 mask = MaxUintFromFormat(vform) >> shift; 1500 } 1501 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1502 } 1503 return dst; 1504 } 1505 1506 1507 LogicVRegister Simulator::ushr(VectorFormat vform, 1508 LogicVRegister dst, 1509 const LogicVRegister& src, 1510 int shift) { 1511 VIXL_ASSERT(shift >= 0); 1512 SimVRegister temp; 1513 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1514 return ushl(vform, dst, src, shiftreg); 1515 } 1516 1517 1518 LogicVRegister Simulator::sshr(VectorFormat vform, 1519 LogicVRegister dst, 1520 const LogicVRegister& src, 1521 int shift) { 1522 VIXL_ASSERT(shift >= 0); 1523 SimVRegister temp; 1524 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1525 return sshl(vform, dst, src, shiftreg); 1526 } 1527 1528 1529 LogicVRegister Simulator::ssra(VectorFormat vform, 1530 LogicVRegister dst, 1531 const LogicVRegister& src, 1532 int shift) { 1533 SimVRegister temp; 1534 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1535 return add(vform, dst, dst, shifted_reg); 1536 } 1537 1538 1539 LogicVRegister Simulator::usra(VectorFormat vform, 1540 LogicVRegister dst, 1541 const LogicVRegister& src, 1542 int shift) { 1543 SimVRegister temp; 1544 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1545 return add(vform, dst, dst, shifted_reg); 1546 } 1547 1548 1549 LogicVRegister Simulator::srsra(VectorFormat vform, 1550 LogicVRegister dst, 1551 const LogicVRegister& src, 1552 int shift) { 1553 SimVRegister temp; 1554 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1555 return add(vform, dst, dst, shifted_reg); 1556 } 1557 1558 1559 LogicVRegister Simulator::ursra(VectorFormat vform, 1560 LogicVRegister dst, 1561 const LogicVRegister& src, 1562 int shift) { 1563 SimVRegister temp; 1564 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1565 return add(vform, dst, dst, shifted_reg); 1566 } 1567 1568 1569 LogicVRegister Simulator::cls(VectorFormat vform, 1570 LogicVRegister dst, 1571 const LogicVRegister& src) { 1572 uint64_t result[16]; 1573 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1574 int laneCount = LaneCountFromFormat(vform); 1575 for (int i = 0; i < laneCount; i++) { 1576 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1577 } 1578 1579 dst.ClearForWrite(vform); 1580 for (int i = 0; i < laneCount; ++i) { 1581 dst.SetUint(vform, i, result[i]); 1582 } 1583 return dst; 1584 } 1585 1586 1587 LogicVRegister Simulator::clz(VectorFormat vform, 1588 LogicVRegister dst, 1589 const LogicVRegister& src) { 1590 uint64_t result[16]; 1591 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1592 int laneCount = LaneCountFromFormat(vform); 1593 for (int i = 0; i < laneCount; i++) { 1594 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1595 } 1596 1597 dst.ClearForWrite(vform); 1598 for (int i = 0; i < laneCount; ++i) { 1599 dst.SetUint(vform, i, result[i]); 1600 } 1601 return dst; 1602 } 1603 1604 1605 LogicVRegister Simulator::cnt(VectorFormat vform, 1606 LogicVRegister dst, 1607 const LogicVRegister& src) { 1608 uint64_t result[16]; 1609 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1610 int laneCount = LaneCountFromFormat(vform); 1611 for (int i = 0; i < laneCount; i++) { 1612 uint64_t value = src.Uint(vform, i); 1613 result[i] = 0; 1614 for (int j = 0; j < laneSizeInBits; j++) { 1615 result[i] += (value & 1); 1616 value >>= 1; 1617 } 1618 } 1619 1620 dst.ClearForWrite(vform); 1621 for (int i = 0; i < laneCount; ++i) { 1622 dst.SetUint(vform, i, result[i]); 1623 } 1624 return dst; 1625 } 1626 1627 1628 LogicVRegister Simulator::sshl(VectorFormat vform, 1629 LogicVRegister dst, 1630 const LogicVRegister& src1, 1631 const LogicVRegister& src2) { 1632 dst.ClearForWrite(vform); 1633 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1634 int8_t shift_val = src2.Int(vform, i); 1635 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1636 1637 // Set signed saturation state. 1638 if ((shift_val > CountLeadingSignBits(lj_src_val)) && 1639 (lj_src_val != 0)) { 1640 dst.SetSignedSat(i, lj_src_val >= 0); 1641 } 1642 1643 // Set unsigned saturation state. 1644 if (lj_src_val < 0) { 1645 dst.SetUnsignedSat(i, false); 1646 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1647 (lj_src_val != 0)) { 1648 dst.SetUnsignedSat(i, true); 1649 } 1650 1651 int64_t src_val = src1.Int(vform, i); 1652 if (shift_val > 63) { 1653 dst.SetInt(vform, i, 0); 1654 } else if (shift_val < -63) { 1655 dst.SetRounding(i, src_val < 0); 1656 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1657 } else { 1658 if (shift_val < 0) { 1659 // Set rounding state. Rounding only needed on right shifts. 1660 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1661 dst.SetRounding(i, true); 1662 } 1663 src_val >>= -shift_val; 1664 } else { 1665 src_val <<= shift_val; 1666 } 1667 dst.SetInt(vform, i, src_val); 1668 } 1669 } 1670 return dst; 1671 } 1672 1673 1674 LogicVRegister Simulator::ushl(VectorFormat vform, 1675 LogicVRegister dst, 1676 const LogicVRegister& src1, 1677 const LogicVRegister& src2) { 1678 dst.ClearForWrite(vform); 1679 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1680 int8_t shift_val = src2.Int(vform, i); 1681 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1682 1683 // Set saturation state. 1684 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1685 dst.SetUnsignedSat(i, true); 1686 } 1687 1688 uint64_t src_val = src1.Uint(vform, i); 1689 if ((shift_val > 63) || (shift_val < -64)) { 1690 dst.SetUint(vform, i, 0); 1691 } else { 1692 if (shift_val < 0) { 1693 // Set rounding state. Rounding only needed on right shifts. 1694 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1695 dst.SetRounding(i, true); 1696 } 1697 1698 if (shift_val == -64) { 1699 src_val = 0; 1700 } else { 1701 src_val >>= -shift_val; 1702 } 1703 } else { 1704 src_val <<= shift_val; 1705 } 1706 dst.SetUint(vform, i, src_val); 1707 } 1708 } 1709 return dst; 1710 } 1711 1712 1713 LogicVRegister Simulator::neg(VectorFormat vform, 1714 LogicVRegister dst, 1715 const LogicVRegister& src) { 1716 dst.ClearForWrite(vform); 1717 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1718 // Test for signed saturation. 1719 int64_t sa = src.Int(vform, i); 1720 if (sa == MinIntFromFormat(vform)) { 1721 dst.SetSignedSat(i, true); 1722 } 1723 dst.SetInt(vform, i, -sa); 1724 } 1725 return dst; 1726 } 1727 1728 1729 LogicVRegister Simulator::suqadd(VectorFormat vform, 1730 LogicVRegister dst, 1731 const LogicVRegister& src) { 1732 dst.ClearForWrite(vform); 1733 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1734 int64_t sa = dst.IntLeftJustified(vform, i); 1735 uint64_t ub = src.UintLeftJustified(vform, i); 1736 int64_t sr = sa + ub; 1737 1738 if (sr < sa) { // Test for signed positive saturation. 1739 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1740 } else { 1741 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 1742 } 1743 } 1744 return dst; 1745 } 1746 1747 1748 LogicVRegister Simulator::usqadd(VectorFormat vform, 1749 LogicVRegister dst, 1750 const LogicVRegister& src) { 1751 dst.ClearForWrite(vform); 1752 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1753 uint64_t ua = dst.UintLeftJustified(vform, i); 1754 int64_t sb = src.IntLeftJustified(vform, i); 1755 uint64_t ur = ua + sb; 1756 1757 if ((sb > 0) && (ur <= ua)) { 1758 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 1759 } else if ((sb < 0) && (ur >= ua)) { 1760 dst.SetUint(vform, i, 0); // Negative saturation. 1761 } else { 1762 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 1763 } 1764 } 1765 return dst; 1766 } 1767 1768 1769 LogicVRegister Simulator::abs(VectorFormat vform, 1770 LogicVRegister dst, 1771 const LogicVRegister& src) { 1772 dst.ClearForWrite(vform); 1773 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1774 // Test for signed saturation. 1775 int64_t sa = src.Int(vform, i); 1776 if (sa == MinIntFromFormat(vform)) { 1777 dst.SetSignedSat(i, true); 1778 } 1779 if (sa < 0) { 1780 dst.SetInt(vform, i, -sa); 1781 } else { 1782 dst.SetInt(vform, i, sa); 1783 } 1784 } 1785 return dst; 1786 } 1787 1788 1789 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 1790 LogicVRegister dst, 1791 bool dstIsSigned, 1792 const LogicVRegister& src, 1793 bool srcIsSigned) { 1794 bool upperhalf = false; 1795 VectorFormat srcform = kFormatUndefined; 1796 int64_t ssrc[8]; 1797 uint64_t usrc[8]; 1798 1799 switch (dstform) { 1800 case kFormat8B : upperhalf = false; srcform = kFormat8H; break; 1801 case kFormat16B: upperhalf = true; srcform = kFormat8H; break; 1802 case kFormat4H : upperhalf = false; srcform = kFormat4S; break; 1803 case kFormat8H : upperhalf = true; srcform = kFormat4S; break; 1804 case kFormat2S : upperhalf = false; srcform = kFormat2D; break; 1805 case kFormat4S : upperhalf = true; srcform = kFormat2D; break; 1806 case kFormatB : upperhalf = false; srcform = kFormatH; break; 1807 case kFormatH : upperhalf = false; srcform = kFormatS; break; 1808 case kFormatS : upperhalf = false; srcform = kFormatD; break; 1809 default:VIXL_UNIMPLEMENTED(); 1810 } 1811 1812 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1813 ssrc[i] = src.Int(srcform, i); 1814 usrc[i] = src.Uint(srcform, i); 1815 } 1816 1817 int offset; 1818 if (upperhalf) { 1819 offset = LaneCountFromFormat(dstform) / 2; 1820 } else { 1821 offset = 0; 1822 dst.ClearForWrite(dstform); 1823 } 1824 1825 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1826 // Test for signed saturation 1827 if (ssrc[i] > MaxIntFromFormat(dstform)) { 1828 dst.SetSignedSat(offset + i, true); 1829 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 1830 dst.SetSignedSat(offset + i, false); 1831 } 1832 1833 // Test for unsigned saturation 1834 if (srcIsSigned) { 1835 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 1836 dst.SetUnsignedSat(offset + i, true); 1837 } else if (ssrc[i] < 0) { 1838 dst.SetUnsignedSat(offset + i, false); 1839 } 1840 } else { 1841 if (usrc[i] > MaxUintFromFormat(dstform)) { 1842 dst.SetUnsignedSat(offset + i, true); 1843 } 1844 } 1845 1846 int64_t result; 1847 if (srcIsSigned) { 1848 result = ssrc[i] & MaxUintFromFormat(dstform); 1849 } else { 1850 result = usrc[i] & MaxUintFromFormat(dstform); 1851 } 1852 1853 if (dstIsSigned) { 1854 dst.SetInt(dstform, offset + i, result); 1855 } else { 1856 dst.SetUint(dstform, offset + i, result); 1857 } 1858 } 1859 return dst; 1860 } 1861 1862 1863 LogicVRegister Simulator::xtn(VectorFormat vform, 1864 LogicVRegister dst, 1865 const LogicVRegister& src) { 1866 return extractnarrow(vform, dst, true, src, true); 1867 } 1868 1869 1870 LogicVRegister Simulator::sqxtn(VectorFormat vform, 1871 LogicVRegister dst, 1872 const LogicVRegister& src) { 1873 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 1874 } 1875 1876 1877 LogicVRegister Simulator::sqxtun(VectorFormat vform, 1878 LogicVRegister dst, 1879 const LogicVRegister& src) { 1880 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 1881 } 1882 1883 1884 LogicVRegister Simulator::uqxtn(VectorFormat vform, 1885 LogicVRegister dst, 1886 const LogicVRegister& src) { 1887 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 1888 } 1889 1890 1891 LogicVRegister Simulator::absdiff(VectorFormat vform, 1892 LogicVRegister dst, 1893 const LogicVRegister& src1, 1894 const LogicVRegister& src2, 1895 bool issigned) { 1896 dst.ClearForWrite(vform); 1897 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1898 if (issigned) { 1899 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 1900 sr = sr > 0 ? sr : -sr; 1901 dst.SetInt(vform, i, sr); 1902 } else { 1903 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 1904 sr = sr > 0 ? sr : -sr; 1905 dst.SetUint(vform, i, sr); 1906 } 1907 } 1908 return dst; 1909 } 1910 1911 1912 LogicVRegister Simulator::saba(VectorFormat vform, 1913 LogicVRegister dst, 1914 const LogicVRegister& src1, 1915 const LogicVRegister& src2) { 1916 SimVRegister temp; 1917 dst.ClearForWrite(vform); 1918 absdiff(vform, temp, src1, src2, true); 1919 add(vform, dst, dst, temp); 1920 return dst; 1921 } 1922 1923 1924 LogicVRegister Simulator::uaba(VectorFormat vform, 1925 LogicVRegister dst, 1926 const LogicVRegister& src1, 1927 const LogicVRegister& src2) { 1928 SimVRegister temp; 1929 dst.ClearForWrite(vform); 1930 absdiff(vform, temp, src1, src2, false); 1931 add(vform, dst, dst, temp); 1932 return dst; 1933 } 1934 1935 1936 LogicVRegister Simulator::not_(VectorFormat vform, 1937 LogicVRegister dst, 1938 const LogicVRegister& src) { 1939 dst.ClearForWrite(vform); 1940 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1941 dst.SetUint(vform, i, ~src.Uint(vform, i)); 1942 } 1943 return dst; 1944 } 1945 1946 1947 LogicVRegister Simulator::rbit(VectorFormat vform, 1948 LogicVRegister dst, 1949 const LogicVRegister& src) { 1950 uint64_t result[16]; 1951 int laneCount = LaneCountFromFormat(vform); 1952 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1953 uint64_t reversed_value; 1954 uint64_t value; 1955 for (int i = 0; i < laneCount; i++) { 1956 value = src.Uint(vform, i); 1957 reversed_value = 0; 1958 for (int j = 0; j < laneSizeInBits; j++) { 1959 reversed_value = (reversed_value << 1) | (value & 1); 1960 value >>= 1; 1961 } 1962 result[i] = reversed_value; 1963 } 1964 1965 dst.ClearForWrite(vform); 1966 for (int i = 0; i < laneCount; ++i) { 1967 dst.SetUint(vform, i, result[i]); 1968 } 1969 return dst; 1970 } 1971 1972 1973 LogicVRegister Simulator::rev(VectorFormat vform, 1974 LogicVRegister dst, 1975 const LogicVRegister& src, 1976 int revSize) { 1977 uint64_t result[16]; 1978 int laneCount = LaneCountFromFormat(vform); 1979 int laneSize = LaneSizeInBytesFromFormat(vform); 1980 int lanesPerLoop = revSize / laneSize; 1981 for (int i = 0; i < laneCount; i += lanesPerLoop) { 1982 for (int j = 0; j < lanesPerLoop; j++) { 1983 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 1984 } 1985 } 1986 dst.ClearForWrite(vform); 1987 for (int i = 0; i < laneCount; ++i) { 1988 dst.SetUint(vform, i, result[i]); 1989 } 1990 return dst; 1991 } 1992 1993 1994 LogicVRegister Simulator::rev16(VectorFormat vform, 1995 LogicVRegister dst, 1996 const LogicVRegister& src) { 1997 return rev(vform, dst, src, 2); 1998 } 1999 2000 2001 LogicVRegister Simulator::rev32(VectorFormat vform, 2002 LogicVRegister dst, 2003 const LogicVRegister& src) { 2004 return rev(vform, dst, src, 4); 2005 } 2006 2007 2008 LogicVRegister Simulator::rev64(VectorFormat vform, 2009 LogicVRegister dst, 2010 const LogicVRegister& src) { 2011 return rev(vform, dst, src, 8); 2012 } 2013 2014 2015 LogicVRegister Simulator::addlp(VectorFormat vform, 2016 LogicVRegister dst, 2017 const LogicVRegister& src, 2018 bool is_signed, 2019 bool do_accumulate) { 2020 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2021 2022 int64_t sr[16]; 2023 uint64_t ur[16]; 2024 2025 int laneCount = LaneCountFromFormat(vform); 2026 for (int i = 0; i < laneCount; ++i) { 2027 if (is_signed) { 2028 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2029 } else { 2030 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2031 } 2032 } 2033 2034 dst.ClearForWrite(vform); 2035 for (int i = 0; i < laneCount; ++i) { 2036 if (do_accumulate) { 2037 if (is_signed) { 2038 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2039 } else { 2040 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2041 } 2042 } else { 2043 if (is_signed) { 2044 dst.SetInt(vform, i, sr[i]); 2045 } else { 2046 dst.SetUint(vform, i, ur[i]); 2047 } 2048 } 2049 } 2050 2051 return dst; 2052 } 2053 2054 2055 LogicVRegister Simulator::saddlp(VectorFormat vform, 2056 LogicVRegister dst, 2057 const LogicVRegister& src) { 2058 return addlp(vform, dst, src, true, false); 2059 } 2060 2061 2062 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2063 LogicVRegister dst, 2064 const LogicVRegister& src) { 2065 return addlp(vform, dst, src, false, false); 2066 } 2067 2068 2069 LogicVRegister Simulator::sadalp(VectorFormat vform, 2070 LogicVRegister dst, 2071 const LogicVRegister& src) { 2072 return addlp(vform, dst, src, true, true); 2073 } 2074 2075 2076 LogicVRegister Simulator::uadalp(VectorFormat vform, 2077 LogicVRegister dst, 2078 const LogicVRegister& src) { 2079 return addlp(vform, dst, src, false, true); 2080 } 2081 2082 2083 LogicVRegister Simulator::ext(VectorFormat vform, 2084 LogicVRegister dst, 2085 const LogicVRegister& src1, 2086 const LogicVRegister& src2, 2087 int index) { 2088 uint8_t result[16]; 2089 int laneCount = LaneCountFromFormat(vform); 2090 for (int i = 0; i < laneCount - index; ++i) { 2091 result[i] = src1.Uint(vform, i + index); 2092 } 2093 for (int i = 0; i < index; ++i) { 2094 result[laneCount - index + i] = src2.Uint(vform, i); 2095 } 2096 dst.ClearForWrite(vform); 2097 for (int i = 0; i < laneCount; ++i) { 2098 dst.SetUint(vform, i, result[i]); 2099 } 2100 return dst; 2101 } 2102 2103 2104 LogicVRegister Simulator::dup_element(VectorFormat vform, 2105 LogicVRegister dst, 2106 const LogicVRegister& src, 2107 int src_index) { 2108 int laneCount = LaneCountFromFormat(vform); 2109 uint64_t value = src.Uint(vform, src_index); 2110 dst.ClearForWrite(vform); 2111 for (int i = 0; i < laneCount; ++i) { 2112 dst.SetUint(vform, i, value); 2113 } 2114 return dst; 2115 } 2116 2117 2118 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2119 LogicVRegister dst, 2120 uint64_t imm) { 2121 int laneCount = LaneCountFromFormat(vform); 2122 uint64_t value = imm & MaxUintFromFormat(vform); 2123 dst.ClearForWrite(vform); 2124 for (int i = 0; i < laneCount; ++i) { 2125 dst.SetUint(vform, i, value); 2126 } 2127 return dst; 2128 } 2129 2130 2131 LogicVRegister Simulator::ins_element(VectorFormat vform, 2132 LogicVRegister dst, 2133 int dst_index, 2134 const LogicVRegister& src, 2135 int src_index) { 2136 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2137 return dst; 2138 } 2139 2140 2141 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2142 LogicVRegister dst, 2143 int dst_index, 2144 uint64_t imm) { 2145 uint64_t value = imm & MaxUintFromFormat(vform); 2146 dst.SetUint(vform, dst_index, value); 2147 return dst; 2148 } 2149 2150 2151 LogicVRegister Simulator::mov(VectorFormat vform, 2152 LogicVRegister dst, 2153 const LogicVRegister& src) { 2154 dst.ClearForWrite(vform); 2155 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { 2156 dst.SetUint(vform, lane, src.Uint(vform, lane)); 2157 } 2158 return dst; 2159 } 2160 2161 2162 LogicVRegister Simulator::movi(VectorFormat vform, 2163 LogicVRegister dst, 2164 uint64_t imm) { 2165 int laneCount = LaneCountFromFormat(vform); 2166 dst.ClearForWrite(vform); 2167 for (int i = 0; i < laneCount; ++i) { 2168 dst.SetUint(vform, i, imm); 2169 } 2170 return dst; 2171 } 2172 2173 2174 LogicVRegister Simulator::mvni(VectorFormat vform, 2175 LogicVRegister dst, 2176 uint64_t imm) { 2177 int laneCount = LaneCountFromFormat(vform); 2178 dst.ClearForWrite(vform); 2179 for (int i = 0; i < laneCount; ++i) { 2180 dst.SetUint(vform, i, ~imm); 2181 } 2182 return dst; 2183 } 2184 2185 2186 LogicVRegister Simulator::orr(VectorFormat vform, 2187 LogicVRegister dst, 2188 const LogicVRegister& src, 2189 uint64_t imm) { 2190 uint64_t result[16]; 2191 int laneCount = LaneCountFromFormat(vform); 2192 for (int i = 0; i < laneCount; ++i) { 2193 result[i] = src.Uint(vform, i) | imm; 2194 } 2195 dst.ClearForWrite(vform); 2196 for (int i = 0; i < laneCount; ++i) { 2197 dst.SetUint(vform, i, result[i]); 2198 } 2199 return dst; 2200 } 2201 2202 2203 LogicVRegister Simulator::uxtl(VectorFormat vform, 2204 LogicVRegister dst, 2205 const LogicVRegister& src) { 2206 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2207 2208 dst.ClearForWrite(vform); 2209 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2210 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2211 } 2212 return dst; 2213 } 2214 2215 2216 LogicVRegister Simulator::sxtl(VectorFormat vform, 2217 LogicVRegister dst, 2218 const LogicVRegister& src) { 2219 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2220 2221 dst.ClearForWrite(vform); 2222 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2223 dst.SetInt(vform, i, src.Int(vform_half, i)); 2224 } 2225 return dst; 2226 } 2227 2228 2229 LogicVRegister Simulator::uxtl2(VectorFormat vform, 2230 LogicVRegister dst, 2231 const LogicVRegister& src) { 2232 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2233 int lane_count = LaneCountFromFormat(vform); 2234 2235 dst.ClearForWrite(vform); 2236 for (int i = 0; i < lane_count; i++) { 2237 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2238 } 2239 return dst; 2240 } 2241 2242 2243 LogicVRegister Simulator::sxtl2(VectorFormat vform, 2244 LogicVRegister dst, 2245 const LogicVRegister& src) { 2246 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2247 int lane_count = LaneCountFromFormat(vform); 2248 2249 dst.ClearForWrite(vform); 2250 for (int i = 0; i < lane_count; i++) { 2251 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2252 } 2253 return dst; 2254 } 2255 2256 2257 LogicVRegister Simulator::shrn(VectorFormat vform, 2258 LogicVRegister dst, 2259 const LogicVRegister& src, 2260 int shift) { 2261 SimVRegister temp; 2262 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2263 VectorFormat vform_dst = vform; 2264 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2265 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2266 } 2267 2268 2269 LogicVRegister Simulator::shrn2(VectorFormat vform, 2270 LogicVRegister dst, 2271 const LogicVRegister& src, 2272 int shift) { 2273 SimVRegister temp; 2274 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2275 VectorFormat vformdst = vform; 2276 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2277 return extractnarrow(vformdst, dst, false, shifted_src, false); 2278 } 2279 2280 2281 LogicVRegister Simulator::rshrn(VectorFormat vform, 2282 LogicVRegister dst, 2283 const LogicVRegister& src, 2284 int shift) { 2285 SimVRegister temp; 2286 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2287 VectorFormat vformdst = vform; 2288 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2289 return extractnarrow(vformdst, dst, false, shifted_src, false); 2290 } 2291 2292 2293 LogicVRegister Simulator::rshrn2(VectorFormat vform, 2294 LogicVRegister dst, 2295 const LogicVRegister& src, 2296 int shift) { 2297 SimVRegister temp; 2298 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2299 VectorFormat vformdst = vform; 2300 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2301 return extractnarrow(vformdst, dst, false, shifted_src, false); 2302 } 2303 2304 2305 LogicVRegister Simulator::tbl(VectorFormat vform, 2306 LogicVRegister dst, 2307 const LogicVRegister& tab, 2308 const LogicVRegister& ind) { 2309 SimVRegister result; 2310 movi(vform, result, 0); 2311 tbx(vform, result, tab, ind); 2312 return orr(vform, dst, result, result); 2313 } 2314 2315 2316 LogicVRegister Simulator::tbl(VectorFormat vform, 2317 LogicVRegister dst, 2318 const LogicVRegister& tab, 2319 const LogicVRegister& tab2, 2320 const LogicVRegister& ind) { 2321 SimVRegister result; 2322 movi(vform, result, 0); 2323 tbx(vform, result, tab, tab2, ind); 2324 return orr(vform, dst, result, result); 2325 } 2326 2327 2328 LogicVRegister Simulator::tbl(VectorFormat vform, 2329 LogicVRegister dst, 2330 const LogicVRegister& tab, 2331 const LogicVRegister& tab2, 2332 const LogicVRegister& tab3, 2333 const LogicVRegister& ind) { 2334 SimVRegister result; 2335 movi(vform, result, 0); 2336 tbx(vform, result, tab, tab2, tab3, ind); 2337 return orr(vform, dst, result, result); 2338 } 2339 2340 2341 LogicVRegister Simulator::tbl(VectorFormat vform, 2342 LogicVRegister dst, 2343 const LogicVRegister& tab, 2344 const LogicVRegister& tab2, 2345 const LogicVRegister& tab3, 2346 const LogicVRegister& tab4, 2347 const LogicVRegister& ind) { 2348 SimVRegister result; 2349 movi(vform, result, 0); 2350 tbx(vform, result, tab, tab2, tab3, tab4, ind); 2351 return orr(vform, dst, result, result); 2352 } 2353 2354 2355 LogicVRegister Simulator::tbx(VectorFormat vform, 2356 LogicVRegister dst, 2357 const LogicVRegister& tab, 2358 const LogicVRegister& ind) { 2359 dst.ClearForWrite(vform); 2360 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2361 uint64_t j = ind.Uint(vform, i); 2362 switch (j >> 4) { 2363 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2364 } 2365 } 2366 return dst; 2367 } 2368 2369 2370 LogicVRegister Simulator::tbx(VectorFormat vform, 2371 LogicVRegister dst, 2372 const LogicVRegister& tab, 2373 const LogicVRegister& tab2, 2374 const LogicVRegister& ind) { 2375 dst.ClearForWrite(vform); 2376 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2377 uint64_t j = ind.Uint(vform, i); 2378 switch (j >> 4) { 2379 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2380 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2381 } 2382 } 2383 return dst; 2384 } 2385 2386 2387 LogicVRegister Simulator::tbx(VectorFormat vform, 2388 LogicVRegister dst, 2389 const LogicVRegister& tab, 2390 const LogicVRegister& tab2, 2391 const LogicVRegister& tab3, 2392 const LogicVRegister& ind) { 2393 dst.ClearForWrite(vform); 2394 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2395 uint64_t j = ind.Uint(vform, i); 2396 switch (j >> 4) { 2397 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2398 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2399 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2400 } 2401 } 2402 return dst; 2403 } 2404 2405 2406 LogicVRegister Simulator::tbx(VectorFormat vform, 2407 LogicVRegister dst, 2408 const LogicVRegister& tab, 2409 const LogicVRegister& tab2, 2410 const LogicVRegister& tab3, 2411 const LogicVRegister& tab4, 2412 const LogicVRegister& ind) { 2413 dst.ClearForWrite(vform); 2414 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2415 uint64_t j = ind.Uint(vform, i); 2416 switch (j >> 4) { 2417 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2418 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2419 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2420 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break; 2421 } 2422 } 2423 return dst; 2424 } 2425 2426 2427 LogicVRegister Simulator::uqshrn(VectorFormat vform, 2428 LogicVRegister dst, 2429 const LogicVRegister& src, 2430 int shift) { 2431 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2432 } 2433 2434 2435 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2436 LogicVRegister dst, 2437 const LogicVRegister& src, 2438 int shift) { 2439 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2440 } 2441 2442 2443 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2444 LogicVRegister dst, 2445 const LogicVRegister& src, 2446 int shift) { 2447 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2448 } 2449 2450 2451 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2452 LogicVRegister dst, 2453 const LogicVRegister& src, 2454 int shift) { 2455 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2456 } 2457 2458 2459 LogicVRegister Simulator::sqshrn(VectorFormat vform, 2460 LogicVRegister dst, 2461 const LogicVRegister& src, 2462 int shift) { 2463 SimVRegister temp; 2464 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2465 VectorFormat vformdst = vform; 2466 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2467 return sqxtn(vformdst, dst, shifted_src); 2468 } 2469 2470 2471 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2472 LogicVRegister dst, 2473 const LogicVRegister& src, 2474 int shift) { 2475 SimVRegister temp; 2476 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2477 VectorFormat vformdst = vform; 2478 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2479 return sqxtn(vformdst, dst, shifted_src); 2480 } 2481 2482 2483 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2484 LogicVRegister dst, 2485 const LogicVRegister& src, 2486 int shift) { 2487 SimVRegister temp; 2488 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2489 VectorFormat vformdst = vform; 2490 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2491 return sqxtn(vformdst, dst, shifted_src); 2492 } 2493 2494 2495 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2496 LogicVRegister dst, 2497 const LogicVRegister& src, 2498 int shift) { 2499 SimVRegister temp; 2500 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2501 VectorFormat vformdst = vform; 2502 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2503 return sqxtn(vformdst, dst, shifted_src); 2504 } 2505 2506 2507 LogicVRegister Simulator::sqshrun(VectorFormat vform, 2508 LogicVRegister dst, 2509 const LogicVRegister& src, 2510 int shift) { 2511 SimVRegister temp; 2512 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2513 VectorFormat vformdst = vform; 2514 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2515 return sqxtun(vformdst, dst, shifted_src); 2516 } 2517 2518 2519 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2520 LogicVRegister dst, 2521 const LogicVRegister& src, 2522 int shift) { 2523 SimVRegister temp; 2524 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2525 VectorFormat vformdst = vform; 2526 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2527 return sqxtun(vformdst, dst, shifted_src); 2528 } 2529 2530 2531 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2532 LogicVRegister dst, 2533 const LogicVRegister& src, 2534 int shift) { 2535 SimVRegister temp; 2536 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2537 VectorFormat vformdst = vform; 2538 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2539 return sqxtun(vformdst, dst, shifted_src); 2540 } 2541 2542 2543 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2544 LogicVRegister dst, 2545 const LogicVRegister& src, 2546 int shift) { 2547 SimVRegister temp; 2548 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2549 VectorFormat vformdst = vform; 2550 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2551 return sqxtun(vformdst, dst, shifted_src); 2552 } 2553 2554 2555 LogicVRegister Simulator::uaddl(VectorFormat vform, 2556 LogicVRegister dst, 2557 const LogicVRegister& src1, 2558 const LogicVRegister& src2) { 2559 SimVRegister temp1, temp2; 2560 uxtl(vform, temp1, src1); 2561 uxtl(vform, temp2, src2); 2562 add(vform, dst, temp1, temp2); 2563 return dst; 2564 } 2565 2566 2567 LogicVRegister Simulator::uaddl2(VectorFormat vform, 2568 LogicVRegister dst, 2569 const LogicVRegister& src1, 2570 const LogicVRegister& src2) { 2571 SimVRegister temp1, temp2; 2572 uxtl2(vform, temp1, src1); 2573 uxtl2(vform, temp2, src2); 2574 add(vform, dst, temp1, temp2); 2575 return dst; 2576 } 2577 2578 2579 LogicVRegister Simulator::uaddw(VectorFormat vform, 2580 LogicVRegister dst, 2581 const LogicVRegister& src1, 2582 const LogicVRegister& src2) { 2583 SimVRegister temp; 2584 uxtl(vform, temp, src2); 2585 add(vform, dst, src1, temp); 2586 return dst; 2587 } 2588 2589 2590 LogicVRegister Simulator::uaddw2(VectorFormat vform, 2591 LogicVRegister dst, 2592 const LogicVRegister& src1, 2593 const LogicVRegister& src2) { 2594 SimVRegister temp; 2595 uxtl2(vform, temp, src2); 2596 add(vform, dst, src1, temp); 2597 return dst; 2598 } 2599 2600 2601 LogicVRegister Simulator::saddl(VectorFormat vform, 2602 LogicVRegister dst, 2603 const LogicVRegister& src1, 2604 const LogicVRegister& src2) { 2605 SimVRegister temp1, temp2; 2606 sxtl(vform, temp1, src1); 2607 sxtl(vform, temp2, src2); 2608 add(vform, dst, temp1, temp2); 2609 return dst; 2610 } 2611 2612 2613 LogicVRegister Simulator::saddl2(VectorFormat vform, 2614 LogicVRegister dst, 2615 const LogicVRegister& src1, 2616 const LogicVRegister& src2) { 2617 SimVRegister temp1, temp2; 2618 sxtl2(vform, temp1, src1); 2619 sxtl2(vform, temp2, src2); 2620 add(vform, dst, temp1, temp2); 2621 return dst; 2622 } 2623 2624 2625 LogicVRegister Simulator::saddw(VectorFormat vform, 2626 LogicVRegister dst, 2627 const LogicVRegister& src1, 2628 const LogicVRegister& src2) { 2629 SimVRegister temp; 2630 sxtl(vform, temp, src2); 2631 add(vform, dst, src1, temp); 2632 return dst; 2633 } 2634 2635 2636 LogicVRegister Simulator::saddw2(VectorFormat vform, 2637 LogicVRegister dst, 2638 const LogicVRegister& src1, 2639 const LogicVRegister& src2) { 2640 SimVRegister temp; 2641 sxtl2(vform, temp, src2); 2642 add(vform, dst, src1, temp); 2643 return dst; 2644 } 2645 2646 2647 LogicVRegister Simulator::usubl(VectorFormat vform, 2648 LogicVRegister dst, 2649 const LogicVRegister& src1, 2650 const LogicVRegister& src2) { 2651 SimVRegister temp1, temp2; 2652 uxtl(vform, temp1, src1); 2653 uxtl(vform, temp2, src2); 2654 sub(vform, dst, temp1, temp2); 2655 return dst; 2656 } 2657 2658 2659 LogicVRegister Simulator::usubl2(VectorFormat vform, 2660 LogicVRegister dst, 2661 const LogicVRegister& src1, 2662 const LogicVRegister& src2) { 2663 SimVRegister temp1, temp2; 2664 uxtl2(vform, temp1, src1); 2665 uxtl2(vform, temp2, src2); 2666 sub(vform, dst, temp1, temp2); 2667 return dst; 2668 } 2669 2670 2671 LogicVRegister Simulator::usubw(VectorFormat vform, 2672 LogicVRegister dst, 2673 const LogicVRegister& src1, 2674 const LogicVRegister& src2) { 2675 SimVRegister temp; 2676 uxtl(vform, temp, src2); 2677 sub(vform, dst, src1, temp); 2678 return dst; 2679 } 2680 2681 2682 LogicVRegister Simulator::usubw2(VectorFormat vform, 2683 LogicVRegister dst, 2684 const LogicVRegister& src1, 2685 const LogicVRegister& src2) { 2686 SimVRegister temp; 2687 uxtl2(vform, temp, src2); 2688 sub(vform, dst, src1, temp); 2689 return dst; 2690 } 2691 2692 2693 LogicVRegister Simulator::ssubl(VectorFormat vform, 2694 LogicVRegister dst, 2695 const LogicVRegister& src1, 2696 const LogicVRegister& src2) { 2697 SimVRegister temp1, temp2; 2698 sxtl(vform, temp1, src1); 2699 sxtl(vform, temp2, src2); 2700 sub(vform, dst, temp1, temp2); 2701 return dst; 2702 } 2703 2704 2705 LogicVRegister Simulator::ssubl2(VectorFormat vform, 2706 LogicVRegister dst, 2707 const LogicVRegister& src1, 2708 const LogicVRegister& src2) { 2709 SimVRegister temp1, temp2; 2710 sxtl2(vform, temp1, src1); 2711 sxtl2(vform, temp2, src2); 2712 sub(vform, dst, temp1, temp2); 2713 return dst; 2714 } 2715 2716 2717 LogicVRegister Simulator::ssubw(VectorFormat vform, 2718 LogicVRegister dst, 2719 const LogicVRegister& src1, 2720 const LogicVRegister& src2) { 2721 SimVRegister temp; 2722 sxtl(vform, temp, src2); 2723 sub(vform, dst, src1, temp); 2724 return dst; 2725 } 2726 2727 2728 LogicVRegister Simulator::ssubw2(VectorFormat vform, 2729 LogicVRegister dst, 2730 const LogicVRegister& src1, 2731 const LogicVRegister& src2) { 2732 SimVRegister temp; 2733 sxtl2(vform, temp, src2); 2734 sub(vform, dst, src1, temp); 2735 return dst; 2736 } 2737 2738 2739 LogicVRegister Simulator::uabal(VectorFormat vform, 2740 LogicVRegister dst, 2741 const LogicVRegister& src1, 2742 const LogicVRegister& src2) { 2743 SimVRegister temp1, temp2; 2744 uxtl(vform, temp1, src1); 2745 uxtl(vform, temp2, src2); 2746 uaba(vform, dst, temp1, temp2); 2747 return dst; 2748 } 2749 2750 2751 LogicVRegister Simulator::uabal2(VectorFormat vform, 2752 LogicVRegister dst, 2753 const LogicVRegister& src1, 2754 const LogicVRegister& src2) { 2755 SimVRegister temp1, temp2; 2756 uxtl2(vform, temp1, src1); 2757 uxtl2(vform, temp2, src2); 2758 uaba(vform, dst, temp1, temp2); 2759 return dst; 2760 } 2761 2762 2763 LogicVRegister Simulator::sabal(VectorFormat vform, 2764 LogicVRegister dst, 2765 const LogicVRegister& src1, 2766 const LogicVRegister& src2) { 2767 SimVRegister temp1, temp2; 2768 sxtl(vform, temp1, src1); 2769 sxtl(vform, temp2, src2); 2770 saba(vform, dst, temp1, temp2); 2771 return dst; 2772 } 2773 2774 2775 LogicVRegister Simulator::sabal2(VectorFormat vform, 2776 LogicVRegister dst, 2777 const LogicVRegister& src1, 2778 const LogicVRegister& src2) { 2779 SimVRegister temp1, temp2; 2780 sxtl2(vform, temp1, src1); 2781 sxtl2(vform, temp2, src2); 2782 saba(vform, dst, temp1, temp2); 2783 return dst; 2784 } 2785 2786 2787 LogicVRegister Simulator::uabdl(VectorFormat vform, 2788 LogicVRegister dst, 2789 const LogicVRegister& src1, 2790 const LogicVRegister& src2) { 2791 SimVRegister temp1, temp2; 2792 uxtl(vform, temp1, src1); 2793 uxtl(vform, temp2, src2); 2794 absdiff(vform, dst, temp1, temp2, false); 2795 return dst; 2796 } 2797 2798 2799 LogicVRegister Simulator::uabdl2(VectorFormat vform, 2800 LogicVRegister dst, 2801 const LogicVRegister& src1, 2802 const LogicVRegister& src2) { 2803 SimVRegister temp1, temp2; 2804 uxtl2(vform, temp1, src1); 2805 uxtl2(vform, temp2, src2); 2806 absdiff(vform, dst, temp1, temp2, false); 2807 return dst; 2808 } 2809 2810 2811 LogicVRegister Simulator::sabdl(VectorFormat vform, 2812 LogicVRegister dst, 2813 const LogicVRegister& src1, 2814 const LogicVRegister& src2) { 2815 SimVRegister temp1, temp2; 2816 sxtl(vform, temp1, src1); 2817 sxtl(vform, temp2, src2); 2818 absdiff(vform, dst, temp1, temp2, true); 2819 return dst; 2820 } 2821 2822 2823 LogicVRegister Simulator::sabdl2(VectorFormat vform, 2824 LogicVRegister dst, 2825 const LogicVRegister& src1, 2826 const LogicVRegister& src2) { 2827 SimVRegister temp1, temp2; 2828 sxtl2(vform, temp1, src1); 2829 sxtl2(vform, temp2, src2); 2830 absdiff(vform, dst, temp1, temp2, true); 2831 return dst; 2832 } 2833 2834 2835 LogicVRegister Simulator::umull(VectorFormat vform, 2836 LogicVRegister dst, 2837 const LogicVRegister& src1, 2838 const LogicVRegister& src2) { 2839 SimVRegister temp1, temp2; 2840 uxtl(vform, temp1, src1); 2841 uxtl(vform, temp2, src2); 2842 mul(vform, dst, temp1, temp2); 2843 return dst; 2844 } 2845 2846 2847 LogicVRegister Simulator::umull2(VectorFormat vform, 2848 LogicVRegister dst, 2849 const LogicVRegister& src1, 2850 const LogicVRegister& src2) { 2851 SimVRegister temp1, temp2; 2852 uxtl2(vform, temp1, src1); 2853 uxtl2(vform, temp2, src2); 2854 mul(vform, dst, temp1, temp2); 2855 return dst; 2856 } 2857 2858 2859 LogicVRegister Simulator::smull(VectorFormat vform, 2860 LogicVRegister dst, 2861 const LogicVRegister& src1, 2862 const LogicVRegister& src2) { 2863 SimVRegister temp1, temp2; 2864 sxtl(vform, temp1, src1); 2865 sxtl(vform, temp2, src2); 2866 mul(vform, dst, temp1, temp2); 2867 return dst; 2868 } 2869 2870 2871 LogicVRegister Simulator::smull2(VectorFormat vform, 2872 LogicVRegister dst, 2873 const LogicVRegister& src1, 2874 const LogicVRegister& src2) { 2875 SimVRegister temp1, temp2; 2876 sxtl2(vform, temp1, src1); 2877 sxtl2(vform, temp2, src2); 2878 mul(vform, dst, temp1, temp2); 2879 return dst; 2880 } 2881 2882 2883 LogicVRegister Simulator::umlsl(VectorFormat vform, 2884 LogicVRegister dst, 2885 const LogicVRegister& src1, 2886 const LogicVRegister& src2) { 2887 SimVRegister temp1, temp2; 2888 uxtl(vform, temp1, src1); 2889 uxtl(vform, temp2, src2); 2890 mls(vform, dst, temp1, temp2); 2891 return dst; 2892 } 2893 2894 2895 LogicVRegister Simulator::umlsl2(VectorFormat vform, 2896 LogicVRegister dst, 2897 const LogicVRegister& src1, 2898 const LogicVRegister& src2) { 2899 SimVRegister temp1, temp2; 2900 uxtl2(vform, temp1, src1); 2901 uxtl2(vform, temp2, src2); 2902 mls(vform, dst, temp1, temp2); 2903 return dst; 2904 } 2905 2906 2907 LogicVRegister Simulator::smlsl(VectorFormat vform, 2908 LogicVRegister dst, 2909 const LogicVRegister& src1, 2910 const LogicVRegister& src2) { 2911 SimVRegister temp1, temp2; 2912 sxtl(vform, temp1, src1); 2913 sxtl(vform, temp2, src2); 2914 mls(vform, dst, temp1, temp2); 2915 return dst; 2916 } 2917 2918 2919 LogicVRegister Simulator::smlsl2(VectorFormat vform, 2920 LogicVRegister dst, 2921 const LogicVRegister& src1, 2922 const LogicVRegister& src2) { 2923 SimVRegister temp1, temp2; 2924 sxtl2(vform, temp1, src1); 2925 sxtl2(vform, temp2, src2); 2926 mls(vform, dst, temp1, temp2); 2927 return dst; 2928 } 2929 2930 2931 LogicVRegister Simulator::umlal(VectorFormat vform, 2932 LogicVRegister dst, 2933 const LogicVRegister& src1, 2934 const LogicVRegister& src2) { 2935 SimVRegister temp1, temp2; 2936 uxtl(vform, temp1, src1); 2937 uxtl(vform, temp2, src2); 2938 mla(vform, dst, temp1, temp2); 2939 return dst; 2940 } 2941 2942 2943 LogicVRegister Simulator::umlal2(VectorFormat vform, 2944 LogicVRegister dst, 2945 const LogicVRegister& src1, 2946 const LogicVRegister& src2) { 2947 SimVRegister temp1, temp2; 2948 uxtl2(vform, temp1, src1); 2949 uxtl2(vform, temp2, src2); 2950 mla(vform, dst, temp1, temp2); 2951 return dst; 2952 } 2953 2954 2955 LogicVRegister Simulator::smlal(VectorFormat vform, 2956 LogicVRegister dst, 2957 const LogicVRegister& src1, 2958 const LogicVRegister& src2) { 2959 SimVRegister temp1, temp2; 2960 sxtl(vform, temp1, src1); 2961 sxtl(vform, temp2, src2); 2962 mla(vform, dst, temp1, temp2); 2963 return dst; 2964 } 2965 2966 2967 LogicVRegister Simulator::smlal2(VectorFormat vform, 2968 LogicVRegister dst, 2969 const LogicVRegister& src1, 2970 const LogicVRegister& src2) { 2971 SimVRegister temp1, temp2; 2972 sxtl2(vform, temp1, src1); 2973 sxtl2(vform, temp2, src2); 2974 mla(vform, dst, temp1, temp2); 2975 return dst; 2976 } 2977 2978 2979 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 2980 LogicVRegister dst, 2981 const LogicVRegister& src1, 2982 const LogicVRegister& src2) { 2983 SimVRegister temp; 2984 LogicVRegister product = sqdmull(vform, temp, src1, src2); 2985 return add(vform, dst, dst, product).SignedSaturate(vform); 2986 } 2987 2988 2989 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 2990 LogicVRegister dst, 2991 const LogicVRegister& src1, 2992 const LogicVRegister& src2) { 2993 SimVRegister temp; 2994 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 2995 return add(vform, dst, dst, product).SignedSaturate(vform); 2996 } 2997 2998 2999 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3000 LogicVRegister dst, 3001 const LogicVRegister& src1, 3002 const LogicVRegister& src2) { 3003 SimVRegister temp; 3004 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3005 return sub(vform, dst, dst, product).SignedSaturate(vform); 3006 } 3007 3008 3009 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3010 LogicVRegister dst, 3011 const LogicVRegister& src1, 3012 const LogicVRegister& src2) { 3013 SimVRegister temp; 3014 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3015 return sub(vform, dst, dst, product).SignedSaturate(vform); 3016 } 3017 3018 3019 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3020 LogicVRegister dst, 3021 const LogicVRegister& src1, 3022 const LogicVRegister& src2) { 3023 SimVRegister temp; 3024 LogicVRegister product = smull(vform, temp, src1, src2); 3025 return add(vform, dst, product, product).SignedSaturate(vform); 3026 } 3027 3028 3029 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3030 LogicVRegister dst, 3031 const LogicVRegister& src1, 3032 const LogicVRegister& src2) { 3033 SimVRegister temp; 3034 LogicVRegister product = smull2(vform, temp, src1, src2); 3035 return add(vform, dst, product, product).SignedSaturate(vform); 3036 } 3037 3038 3039 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3040 LogicVRegister dst, 3041 const LogicVRegister& src1, 3042 const LogicVRegister& src2, 3043 bool round) { 3044 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3045 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3046 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3047 3048 int esize = LaneSizeInBitsFromFormat(vform); 3049 int round_const = round ? (1 << (esize - 2)) : 0; 3050 int64_t product; 3051 3052 dst.ClearForWrite(vform); 3053 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3054 product = src1.Int(vform, i) * src2.Int(vform, i); 3055 product += round_const; 3056 product = product >> (esize - 1); 3057 3058 if (product > MaxIntFromFormat(vform)) { 3059 product = MaxIntFromFormat(vform); 3060 } else if (product < MinIntFromFormat(vform)) { 3061 product = MinIntFromFormat(vform); 3062 } 3063 dst.SetInt(vform, i, product); 3064 } 3065 return dst; 3066 } 3067 3068 3069 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3070 LogicVRegister dst, 3071 const LogicVRegister& src1, 3072 const LogicVRegister& src2) { 3073 return sqrdmulh(vform, dst, src1, src2, false); 3074 } 3075 3076 3077 LogicVRegister Simulator::addhn(VectorFormat vform, 3078 LogicVRegister dst, 3079 const LogicVRegister& src1, 3080 const LogicVRegister& src2) { 3081 SimVRegister temp; 3082 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3083 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3084 return dst; 3085 } 3086 3087 3088 LogicVRegister Simulator::addhn2(VectorFormat vform, 3089 LogicVRegister dst, 3090 const LogicVRegister& src1, 3091 const LogicVRegister& src2) { 3092 SimVRegister temp; 3093 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3094 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3095 return dst; 3096 } 3097 3098 3099 LogicVRegister Simulator::raddhn(VectorFormat vform, 3100 LogicVRegister dst, 3101 const LogicVRegister& src1, 3102 const LogicVRegister& src2) { 3103 SimVRegister temp; 3104 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3105 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3106 return dst; 3107 } 3108 3109 3110 LogicVRegister Simulator::raddhn2(VectorFormat vform, 3111 LogicVRegister dst, 3112 const LogicVRegister& src1, 3113 const LogicVRegister& src2) { 3114 SimVRegister temp; 3115 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3116 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3117 return dst; 3118 } 3119 3120 3121 LogicVRegister Simulator::subhn(VectorFormat vform, 3122 LogicVRegister dst, 3123 const LogicVRegister& src1, 3124 const LogicVRegister& src2) { 3125 SimVRegister temp; 3126 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3127 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3128 return dst; 3129 } 3130 3131 3132 LogicVRegister Simulator::subhn2(VectorFormat vform, 3133 LogicVRegister dst, 3134 const LogicVRegister& src1, 3135 const LogicVRegister& src2) { 3136 SimVRegister temp; 3137 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3138 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3139 return dst; 3140 } 3141 3142 3143 LogicVRegister Simulator::rsubhn(VectorFormat vform, 3144 LogicVRegister dst, 3145 const LogicVRegister& src1, 3146 const LogicVRegister& src2) { 3147 SimVRegister temp; 3148 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3149 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3150 return dst; 3151 } 3152 3153 3154 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3155 LogicVRegister dst, 3156 const LogicVRegister& src1, 3157 const LogicVRegister& src2) { 3158 SimVRegister temp; 3159 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3160 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3161 return dst; 3162 } 3163 3164 3165 LogicVRegister Simulator::trn1(VectorFormat vform, 3166 LogicVRegister dst, 3167 const LogicVRegister& src1, 3168 const LogicVRegister& src2) { 3169 uint64_t result[16]; 3170 int laneCount = LaneCountFromFormat(vform); 3171 int pairs = laneCount / 2; 3172 for (int i = 0; i < pairs; ++i) { 3173 result[2 * i] = src1.Uint(vform, 2 * i); 3174 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3175 } 3176 3177 dst.ClearForWrite(vform); 3178 for (int i = 0; i < laneCount; ++i) { 3179 dst.SetUint(vform, i, result[i]); 3180 } 3181 return dst; 3182 } 3183 3184 3185 LogicVRegister Simulator::trn2(VectorFormat vform, 3186 LogicVRegister dst, 3187 const LogicVRegister& src1, 3188 const LogicVRegister& src2) { 3189 uint64_t result[16]; 3190 int laneCount = LaneCountFromFormat(vform); 3191 int pairs = laneCount / 2; 3192 for (int i = 0; i < pairs; ++i) { 3193 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3194 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3195 } 3196 3197 dst.ClearForWrite(vform); 3198 for (int i = 0; i < laneCount; ++i) { 3199 dst.SetUint(vform, i, result[i]); 3200 } 3201 return dst; 3202 } 3203 3204 3205 LogicVRegister Simulator::zip1(VectorFormat vform, 3206 LogicVRegister dst, 3207 const LogicVRegister& src1, 3208 const LogicVRegister& src2) { 3209 uint64_t result[16]; 3210 int laneCount = LaneCountFromFormat(vform); 3211 int pairs = laneCount / 2; 3212 for (int i = 0; i < pairs; ++i) { 3213 result[2 * i] = src1.Uint(vform, i); 3214 result[(2 * i) + 1] = src2.Uint(vform, i); 3215 } 3216 3217 dst.ClearForWrite(vform); 3218 for (int i = 0; i < laneCount; ++i) { 3219 dst.SetUint(vform, i, result[i]); 3220 } 3221 return dst; 3222 } 3223 3224 3225 LogicVRegister Simulator::zip2(VectorFormat vform, 3226 LogicVRegister dst, 3227 const LogicVRegister& src1, 3228 const LogicVRegister& src2) { 3229 uint64_t result[16]; 3230 int laneCount = LaneCountFromFormat(vform); 3231 int pairs = laneCount / 2; 3232 for (int i = 0; i < pairs; ++i) { 3233 result[2 * i] = src1.Uint(vform, pairs + i); 3234 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3235 } 3236 3237 dst.ClearForWrite(vform); 3238 for (int i = 0; i < laneCount; ++i) { 3239 dst.SetUint(vform, i, result[i]); 3240 } 3241 return dst; 3242 } 3243 3244 3245 LogicVRegister Simulator::uzp1(VectorFormat vform, 3246 LogicVRegister dst, 3247 const LogicVRegister& src1, 3248 const LogicVRegister& src2) { 3249 uint64_t result[32]; 3250 int laneCount = LaneCountFromFormat(vform); 3251 for (int i = 0; i < laneCount; ++i) { 3252 result[i] = src1.Uint(vform, i); 3253 result[laneCount + i] = src2.Uint(vform, i); 3254 } 3255 3256 dst.ClearForWrite(vform); 3257 for (int i = 0; i < laneCount; ++i) { 3258 dst.SetUint(vform, i, result[2 * i]); 3259 } 3260 return dst; 3261 } 3262 3263 3264 LogicVRegister Simulator::uzp2(VectorFormat vform, 3265 LogicVRegister dst, 3266 const LogicVRegister& src1, 3267 const LogicVRegister& src2) { 3268 uint64_t result[32]; 3269 int laneCount = LaneCountFromFormat(vform); 3270 for (int i = 0; i < laneCount; ++i) { 3271 result[i] = src1.Uint(vform, i); 3272 result[laneCount + i] = src2.Uint(vform, i); 3273 } 3274 3275 dst.ClearForWrite(vform); 3276 for (int i = 0; i < laneCount; ++i) { 3277 dst.SetUint(vform, i, result[ (2 * i) + 1]); 3278 } 3279 return dst; 3280 } 3281 3282 3283 template <typename T> 3284 T Simulator::FPAdd(T op1, T op2) { 3285 T result = FPProcessNaNs(op1, op2); 3286 if (std::isnan(result)) return result; 3287 3288 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3289 // inf + -inf returns the default NaN. 3290 FPProcessException(); 3291 return FPDefaultNaN<T>(); 3292 } else { 3293 // Other cases should be handled by standard arithmetic. 3294 return op1 + op2; 3295 } 3296 } 3297 3298 3299 template <typename T> 3300 T Simulator::FPSub(T op1, T op2) { 3301 // NaNs should be handled elsewhere. 3302 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3303 3304 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3305 // inf - inf returns the default NaN. 3306 FPProcessException(); 3307 return FPDefaultNaN<T>(); 3308 } else { 3309 // Other cases should be handled by standard arithmetic. 3310 return op1 - op2; 3311 } 3312 } 3313 3314 3315 template <typename T> 3316 T Simulator::FPMul(T op1, T op2) { 3317 // NaNs should be handled elsewhere. 3318 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3319 3320 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3321 // inf * 0.0 returns the default NaN. 3322 FPProcessException(); 3323 return FPDefaultNaN<T>(); 3324 } else { 3325 // Other cases should be handled by standard arithmetic. 3326 return op1 * op2; 3327 } 3328 } 3329 3330 3331 template<typename T> 3332 T Simulator::FPMulx(T op1, T op2) { 3333 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3334 // inf * 0.0 returns +/-2.0. 3335 T two = 2.0; 3336 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3337 } 3338 return FPMul(op1, op2); 3339 } 3340 3341 3342 template<typename T> 3343 T Simulator::FPMulAdd(T a, T op1, T op2) { 3344 T result = FPProcessNaNs3(a, op1, op2); 3345 3346 T sign_a = copysign(1.0, a); 3347 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3348 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3349 bool operation_generates_nan = 3350 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3351 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3352 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3353 3354 if (std::isnan(result)) { 3355 // Generated NaNs override quiet NaNs propagated from a. 3356 if (operation_generates_nan && IsQuietNaN(a)) { 3357 FPProcessException(); 3358 return FPDefaultNaN<T>(); 3359 } else { 3360 return result; 3361 } 3362 } 3363 3364 // If the operation would produce a NaN, return the default NaN. 3365 if (operation_generates_nan) { 3366 FPProcessException(); 3367 return FPDefaultNaN<T>(); 3368 } 3369 3370 // Work around broken fma implementations for exact zero results: The sign of 3371 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3372 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3373 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3374 } 3375 3376 result = FusedMultiplyAdd(op1, op2, a); 3377 VIXL_ASSERT(!std::isnan(result)); 3378 3379 // Work around broken fma implementations for rounded zero results: If a is 3380 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3381 if ((a == 0.0) && (result == 0.0)) { 3382 return copysign(0.0, sign_prod); 3383 } 3384 3385 return result; 3386 } 3387 3388 3389 template <typename T> 3390 T Simulator::FPDiv(T op1, T op2) { 3391 // NaNs should be handled elsewhere. 3392 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3393 3394 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3395 // inf / inf and 0.0 / 0.0 return the default NaN. 3396 FPProcessException(); 3397 return FPDefaultNaN<T>(); 3398 } else { 3399 if (op2 == 0.0) FPProcessException(); 3400 3401 // Other cases should be handled by standard arithmetic. 3402 return op1 / op2; 3403 } 3404 } 3405 3406 3407 template <typename T> 3408 T Simulator::FPSqrt(T op) { 3409 if (std::isnan(op)) { 3410 return FPProcessNaN(op); 3411 } else if (op < 0.0) { 3412 FPProcessException(); 3413 return FPDefaultNaN<T>(); 3414 } else { 3415 return sqrt(op); 3416 } 3417 } 3418 3419 3420 template <typename T> 3421 T Simulator::FPMax(T a, T b) { 3422 T result = FPProcessNaNs(a, b); 3423 if (std::isnan(result)) return result; 3424 3425 if ((a == 0.0) && (b == 0.0) && 3426 (copysign(1.0, a) != copysign(1.0, b))) { 3427 // a and b are zero, and the sign differs: return +0.0. 3428 return 0.0; 3429 } else { 3430 return (a > b) ? a : b; 3431 } 3432 } 3433 3434 3435 template <typename T> 3436 T Simulator::FPMaxNM(T a, T b) { 3437 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3438 a = kFP64NegativeInfinity; 3439 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3440 b = kFP64NegativeInfinity; 3441 } 3442 3443 T result = FPProcessNaNs(a, b); 3444 return std::isnan(result) ? result : FPMax(a, b); 3445 } 3446 3447 3448 template <typename T> 3449 T Simulator::FPMin(T a, T b) { 3450 T result = FPProcessNaNs(a, b); 3451 if (std::isnan(result)) return result; 3452 3453 if ((a == 0.0) && (b == 0.0) && 3454 (copysign(1.0, a) != copysign(1.0, b))) { 3455 // a and b are zero, and the sign differs: return -0.0. 3456 return -0.0; 3457 } else { 3458 return (a < b) ? a : b; 3459 } 3460 } 3461 3462 3463 template <typename T> 3464 T Simulator::FPMinNM(T a, T b) { 3465 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3466 a = kFP64PositiveInfinity; 3467 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3468 b = kFP64PositiveInfinity; 3469 } 3470 3471 T result = FPProcessNaNs(a, b); 3472 return std::isnan(result) ? result : FPMin(a, b); 3473 } 3474 3475 3476 template <typename T> 3477 T Simulator::FPRecipStepFused(T op1, T op2) { 3478 const T two = 2.0; 3479 if ((std::isinf(op1) && (op2 == 0.0)) 3480 || ((op1 == 0.0) && (std::isinf(op2)))) { 3481 return two; 3482 } else if (std::isinf(op1) || std::isinf(op2)) { 3483 // Return +inf if signs match, otherwise -inf. 3484 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3485 : kFP64NegativeInfinity; 3486 } else { 3487 return FusedMultiplyAdd(op1, op2, two); 3488 } 3489 } 3490 3491 3492 template <typename T> 3493 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3494 const T one_point_five = 1.5; 3495 const T two = 2.0; 3496 3497 if ((std::isinf(op1) && (op2 == 0.0)) 3498 || ((op1 == 0.0) && (std::isinf(op2)))) { 3499 return one_point_five; 3500 } else if (std::isinf(op1) || std::isinf(op2)) { 3501 // Return +inf if signs match, otherwise -inf. 3502 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3503 : kFP64NegativeInfinity; 3504 } else { 3505 // The multiply-add-halve operation must be fully fused, so avoid interim 3506 // rounding by checking which operand can be losslessly divided by two 3507 // before doing the multiply-add. 3508 if (std::isnormal(op1 / two)) { 3509 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3510 } else if (std::isnormal(op2 / two)) { 3511 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3512 } else { 3513 // Neither operand is normal after halving: the result is dominated by 3514 // the addition term, so just return that. 3515 return one_point_five; 3516 } 3517 } 3518 } 3519 3520 int32_t Simulator::FPToFixedJS(double value) { 3521 // The Z-flag is set when the conversion from double precision floating-point 3522 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, 3523 // outside the bounds of a 32-bit integer, or isn't an exact integer then the 3524 // Z-flag is unset. 3525 int Z = 1; 3526 int32_t result; 3527 3528 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3529 (value == kFP64NegativeInfinity)) { 3530 // +/- zero and infinity all return zero, however -0 and +/- Infinity also 3531 // unset the Z-flag. 3532 result = 0.0; 3533 if ((value != 0.0) || std::signbit(value)) { 3534 Z = 0; 3535 } 3536 } else if (std::isnan(value)) { 3537 // NaN values unset the Z-flag and set the result to 0. 3538 FPProcessNaN(value); 3539 result = 0; 3540 Z = 0; 3541 } else { 3542 // All other values are converted to an integer representation, rounded 3543 // toward zero. 3544 double int_result = std::floor(value); 3545 double error = value - int_result; 3546 3547 if ((error != 0.0) && (int_result < 0.0)) { 3548 int_result++; 3549 } 3550 3551 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost 3552 // write a one-liner with std::round, but the behaviour on ties is incorrect 3553 // for our purposes. 3554 double mod_const = static_cast<double>(UINT64_C(1) << 32); 3555 double mod_error = 3556 (int_result / mod_const) - std::floor(int_result / mod_const); 3557 double constrained; 3558 if (mod_error == 0.5) { 3559 constrained = INT32_MIN; 3560 } else { 3561 constrained = int_result - mod_const * round(int_result / mod_const); 3562 } 3563 3564 VIXL_ASSERT(std::floor(constrained) == constrained); 3565 VIXL_ASSERT(constrained >= INT32_MIN); 3566 VIXL_ASSERT(constrained <= INT32_MAX); 3567 3568 // Take the bottom 32 bits of the result as a 32-bit integer. 3569 result = static_cast<int32_t>(constrained); 3570 3571 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || 3572 (error != 0.0)) { 3573 // If the integer result is out of range or the conversion isn't exact, 3574 // take exception and unset the Z-flag. 3575 FPProcessException(); 3576 Z = 0; 3577 } 3578 } 3579 3580 ReadNzcv().SetN(0); 3581 ReadNzcv().SetZ(Z); 3582 ReadNzcv().SetC(0); 3583 ReadNzcv().SetV(0); 3584 3585 return result; 3586 } 3587 3588 3589 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3590 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3591 (value == kFP64NegativeInfinity)) { 3592 return value; 3593 } else if (std::isnan(value)) { 3594 return FPProcessNaN(value); 3595 } 3596 3597 double int_result = std::floor(value); 3598 double error = value - int_result; 3599 switch (round_mode) { 3600 case FPTieAway: { 3601 // Take care of correctly handling the range ]-0.5, -0.0], which must 3602 // yield -0.0. 3603 if ((-0.5 < value) && (value < 0.0)) { 3604 int_result = -0.0; 3605 3606 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3607 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3608 // result is positive, round up. 3609 int_result++; 3610 } 3611 break; 3612 } 3613 case FPTieEven: { 3614 // Take care of correctly handling the range [-0.5, -0.0], which must 3615 // yield -0.0. 3616 if ((-0.5 <= value) && (value < 0.0)) { 3617 int_result = -0.0; 3618 3619 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3620 // result is odd, round up. 3621 } else if ((error > 0.5) || 3622 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3623 int_result++; 3624 } 3625 break; 3626 } 3627 case FPZero: { 3628 // If value>0 then we take floor(value) 3629 // otherwise, ceil(value). 3630 if (value < 0) { 3631 int_result = ceil(value); 3632 } 3633 break; 3634 } 3635 case FPNegativeInfinity: { 3636 // We always use floor(value). 3637 break; 3638 } 3639 case FPPositiveInfinity: { 3640 // Take care of correctly handling the range ]-1.0, -0.0], which must 3641 // yield -0.0. 3642 if ((-1.0 < value) && (value < 0.0)) { 3643 int_result = -0.0; 3644 3645 // If the error is non-zero, round up. 3646 } else if (error > 0.0) { 3647 int_result++; 3648 } 3649 break; 3650 } 3651 default: VIXL_UNIMPLEMENTED(); 3652 } 3653 return int_result; 3654 } 3655 3656 3657 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3658 value = FPRoundInt(value, rmode); 3659 if (value >= kWMaxInt) { 3660 return kWMaxInt; 3661 } else if (value < kWMinInt) { 3662 return kWMinInt; 3663 } 3664 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3665 } 3666 3667 3668 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3669 value = FPRoundInt(value, rmode); 3670 // The compiler would have to round kXMaxInt, triggering a warning. Compare 3671 // against the largest int64_t that is exactly representable as a double. 3672 if (value > kXMaxExactInt) { 3673 return kXMaxInt; 3674 } else if (value < kXMinInt) { 3675 return kXMinInt; 3676 } 3677 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3678 } 3679 3680 3681 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3682 value = FPRoundInt(value, rmode); 3683 if (value >= kWMaxUInt) { 3684 return kWMaxUInt; 3685 } else if (value < 0.0) { 3686 return 0; 3687 } 3688 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3689 } 3690 3691 3692 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3693 value = FPRoundInt(value, rmode); 3694 // The compiler would have to round kXMaxUInt, triggering a warning. Compare 3695 // against the largest uint64_t that is exactly representable as a double. 3696 if (value > kXMaxExactUInt) { 3697 return kXMaxUInt; 3698 } else if (value < 0.0) { 3699 return 0; 3700 } 3701 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3702 } 3703 3704 3705 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3706 template <typename T> \ 3707 LogicVRegister Simulator::FN(VectorFormat vform, \ 3708 LogicVRegister dst, \ 3709 const LogicVRegister& src1, \ 3710 const LogicVRegister& src2) { \ 3711 dst.ClearForWrite(vform); \ 3712 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3713 T op1 = src1.Float<T>(i); \ 3714 T op2 = src2.Float<T>(i); \ 3715 T result; \ 3716 if (PROCNAN) { \ 3717 result = FPProcessNaNs(op1, op2); \ 3718 if (!std::isnan(result)) { \ 3719 result = OP(op1, op2); \ 3720 } \ 3721 } else { \ 3722 result = OP(op1, op2); \ 3723 } \ 3724 dst.SetFloat(i, result); \ 3725 } \ 3726 return dst; \ 3727 } \ 3728 \ 3729 LogicVRegister Simulator::FN(VectorFormat vform, \ 3730 LogicVRegister dst, \ 3731 const LogicVRegister& src1, \ 3732 const LogicVRegister& src2) { \ 3733 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3734 FN<float>(vform, dst, src1, src2); \ 3735 } else { \ 3736 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3737 FN<double>(vform, dst, src1, src2); \ 3738 } \ 3739 return dst; \ 3740 } 3741 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3742 #undef DEFINE_NEON_FP_VECTOR_OP 3743 3744 3745 LogicVRegister Simulator::fnmul(VectorFormat vform, 3746 LogicVRegister dst, 3747 const LogicVRegister& src1, 3748 const LogicVRegister& src2) { 3749 SimVRegister temp; 3750 LogicVRegister product = fmul(vform, temp, src1, src2); 3751 return fneg(vform, dst, product); 3752 } 3753 3754 3755 template <typename T> 3756 LogicVRegister Simulator::frecps(VectorFormat vform, 3757 LogicVRegister dst, 3758 const LogicVRegister& src1, 3759 const LogicVRegister& src2) { 3760 dst.ClearForWrite(vform); 3761 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3762 T op1 = -src1.Float<T>(i); 3763 T op2 = src2.Float<T>(i); 3764 T result = FPProcessNaNs(op1, op2); 3765 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3766 } 3767 return dst; 3768 } 3769 3770 3771 LogicVRegister Simulator::frecps(VectorFormat vform, 3772 LogicVRegister dst, 3773 const LogicVRegister& src1, 3774 const LogicVRegister& src2) { 3775 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3776 frecps<float>(vform, dst, src1, src2); 3777 } else { 3778 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3779 frecps<double>(vform, dst, src1, src2); 3780 } 3781 return dst; 3782 } 3783 3784 3785 template <typename T> 3786 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3787 LogicVRegister dst, 3788 const LogicVRegister& src1, 3789 const LogicVRegister& src2) { 3790 dst.ClearForWrite(vform); 3791 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3792 T op1 = -src1.Float<T>(i); 3793 T op2 = src2.Float<T>(i); 3794 T result = FPProcessNaNs(op1, op2); 3795 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3796 } 3797 return dst; 3798 } 3799 3800 3801 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3802 LogicVRegister dst, 3803 const LogicVRegister& src1, 3804 const LogicVRegister& src2) { 3805 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3806 frsqrts<float>(vform, dst, src1, src2); 3807 } else { 3808 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3809 frsqrts<double>(vform, dst, src1, src2); 3810 } 3811 return dst; 3812 } 3813 3814 3815 template <typename T> 3816 LogicVRegister Simulator::fcmp(VectorFormat vform, 3817 LogicVRegister dst, 3818 const LogicVRegister& src1, 3819 const LogicVRegister& src2, 3820 Condition cond) { 3821 dst.ClearForWrite(vform); 3822 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3823 bool result = false; 3824 T op1 = src1.Float<T>(i); 3825 T op2 = src2.Float<T>(i); 3826 T nan_result = FPProcessNaNs(op1, op2); 3827 if (!std::isnan(nan_result)) { 3828 switch (cond) { 3829 case eq: result = (op1 == op2); break; 3830 case ge: result = (op1 >= op2); break; 3831 case gt: result = (op1 > op2) ; break; 3832 case le: result = (op1 <= op2); break; 3833 case lt: result = (op1 < op2) ; break; 3834 default: VIXL_UNREACHABLE(); break; 3835 } 3836 } 3837 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 3838 } 3839 return dst; 3840 } 3841 3842 3843 LogicVRegister Simulator::fcmp(VectorFormat vform, 3844 LogicVRegister dst, 3845 const LogicVRegister& src1, 3846 const LogicVRegister& src2, 3847 Condition cond) { 3848 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3849 fcmp<float>(vform, dst, src1, src2, cond); 3850 } else { 3851 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3852 fcmp<double>(vform, dst, src1, src2, cond); 3853 } 3854 return dst; 3855 } 3856 3857 3858 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 3859 LogicVRegister dst, 3860 const LogicVRegister& src, 3861 Condition cond) { 3862 SimVRegister temp; 3863 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3864 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 3865 fcmp<float>(vform, dst, src, zero_reg, cond); 3866 } else { 3867 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3868 LogicVRegister zero_reg = dup_immediate(vform, temp, 3869 DoubleToRawbits(0.0)); 3870 fcmp<double>(vform, dst, src, zero_reg, cond); 3871 } 3872 return dst; 3873 } 3874 3875 3876 LogicVRegister Simulator::fabscmp(VectorFormat vform, 3877 LogicVRegister dst, 3878 const LogicVRegister& src1, 3879 const LogicVRegister& src2, 3880 Condition cond) { 3881 SimVRegister temp1, temp2; 3882 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3883 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 3884 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 3885 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 3886 } else { 3887 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3888 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 3889 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 3890 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 3891 } 3892 return dst; 3893 } 3894 3895 3896 template <typename T> 3897 LogicVRegister Simulator::fmla(VectorFormat vform, 3898 LogicVRegister dst, 3899 const LogicVRegister& src1, 3900 const LogicVRegister& src2) { 3901 dst.ClearForWrite(vform); 3902 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3903 T op1 = src1.Float<T>(i); 3904 T op2 = src2.Float<T>(i); 3905 T acc = dst.Float<T>(i); 3906 T result = FPMulAdd(acc, op1, op2); 3907 dst.SetFloat(i, result); 3908 } 3909 return dst; 3910 } 3911 3912 3913 LogicVRegister Simulator::fmla(VectorFormat vform, 3914 LogicVRegister dst, 3915 const LogicVRegister& src1, 3916 const LogicVRegister& src2) { 3917 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3918 fmla<float>(vform, dst, src1, src2); 3919 } else { 3920 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3921 fmla<double>(vform, dst, src1, src2); 3922 } 3923 return dst; 3924 } 3925 3926 3927 template <typename T> 3928 LogicVRegister Simulator::fmls(VectorFormat vform, 3929 LogicVRegister dst, 3930 const LogicVRegister& src1, 3931 const LogicVRegister& src2) { 3932 dst.ClearForWrite(vform); 3933 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3934 T op1 = -src1.Float<T>(i); 3935 T op2 = src2.Float<T>(i); 3936 T acc = dst.Float<T>(i); 3937 T result = FPMulAdd(acc, op1, op2); 3938 dst.SetFloat(i, result); 3939 } 3940 return dst; 3941 } 3942 3943 3944 LogicVRegister Simulator::fmls(VectorFormat vform, 3945 LogicVRegister dst, 3946 const LogicVRegister& src1, 3947 const LogicVRegister& src2) { 3948 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3949 fmls<float>(vform, dst, src1, src2); 3950 } else { 3951 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3952 fmls<double>(vform, dst, src1, src2); 3953 } 3954 return dst; 3955 } 3956 3957 3958 template <typename T> 3959 LogicVRegister Simulator::fneg(VectorFormat vform, 3960 LogicVRegister dst, 3961 const LogicVRegister& src) { 3962 dst.ClearForWrite(vform); 3963 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3964 T op = src.Float<T>(i); 3965 op = -op; 3966 dst.SetFloat(i, op); 3967 } 3968 return dst; 3969 } 3970 3971 3972 LogicVRegister Simulator::fneg(VectorFormat vform, 3973 LogicVRegister dst, 3974 const LogicVRegister& src) { 3975 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3976 fneg<float>(vform, dst, src); 3977 } else { 3978 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3979 fneg<double>(vform, dst, src); 3980 } 3981 return dst; 3982 } 3983 3984 3985 template <typename T> 3986 LogicVRegister Simulator::fabs_(VectorFormat vform, 3987 LogicVRegister dst, 3988 const LogicVRegister& src) { 3989 dst.ClearForWrite(vform); 3990 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3991 T op = src.Float<T>(i); 3992 if (copysign(1.0, op) < 0.0) { 3993 op = -op; 3994 } 3995 dst.SetFloat(i, op); 3996 } 3997 return dst; 3998 } 3999 4000 4001 LogicVRegister Simulator::fabs_(VectorFormat vform, 4002 LogicVRegister dst, 4003 const LogicVRegister& src) { 4004 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4005 fabs_<float>(vform, dst, src); 4006 } else { 4007 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4008 fabs_<double>(vform, dst, src); 4009 } 4010 return dst; 4011 } 4012 4013 4014 LogicVRegister Simulator::fabd(VectorFormat vform, 4015 LogicVRegister dst, 4016 const LogicVRegister& src1, 4017 const LogicVRegister& src2) { 4018 SimVRegister temp; 4019 fsub(vform, temp, src1, src2); 4020 fabs_(vform, dst, temp); 4021 return dst; 4022 } 4023 4024 4025 LogicVRegister Simulator::fsqrt(VectorFormat vform, 4026 LogicVRegister dst, 4027 const LogicVRegister& src) { 4028 dst.ClearForWrite(vform); 4029 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4030 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4031 float result = FPSqrt(src.Float<float>(i)); 4032 dst.SetFloat(i, result); 4033 } 4034 } else { 4035 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4036 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4037 double result = FPSqrt(src.Float<double>(i)); 4038 dst.SetFloat(i, result); 4039 } 4040 } 4041 return dst; 4042 } 4043 4044 4045 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4046 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4047 LogicVRegister dst, \ 4048 const LogicVRegister& src1, \ 4049 const LogicVRegister& src2) { \ 4050 SimVRegister temp1, temp2; \ 4051 uzp1(vform, temp1, src1, src2); \ 4052 uzp2(vform, temp2, src1, src2); \ 4053 FN(vform, dst, temp1, temp2); \ 4054 return dst; \ 4055 } \ 4056 \ 4057 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4058 LogicVRegister dst, \ 4059 const LogicVRegister& src) { \ 4060 if (vform == kFormatS) { \ 4061 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4062 dst.SetFloat(0, result); \ 4063 } else { \ 4064 VIXL_ASSERT(vform == kFormatD); \ 4065 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4066 dst.SetFloat(0, result); \ 4067 } \ 4068 dst.ClearForWrite(vform); \ 4069 return dst; \ 4070 } 4071 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4072 #undef DEFINE_NEON_FP_PAIR_OP 4073 4074 4075 LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4076 LogicVRegister dst, 4077 const LogicVRegister& src, 4078 FPMinMaxOp Op) { 4079 VIXL_ASSERT(vform == kFormat4S); 4080 USE(vform); 4081 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4082 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4083 float result = (this->*Op)(result1, result2); 4084 dst.ClearForWrite(kFormatS); 4085 dst.SetFloat<float>(0, result); 4086 return dst; 4087 } 4088 4089 4090 LogicVRegister Simulator::fmaxv(VectorFormat vform, 4091 LogicVRegister dst, 4092 const LogicVRegister& src) { 4093 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4094 } 4095 4096 4097 LogicVRegister Simulator::fminv(VectorFormat vform, 4098 LogicVRegister dst, 4099 const LogicVRegister& src) { 4100 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4101 } 4102 4103 4104 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4105 LogicVRegister dst, 4106 const LogicVRegister& src) { 4107 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4108 } 4109 4110 4111 LogicVRegister Simulator::fminnmv(VectorFormat vform, 4112 LogicVRegister dst, 4113 const LogicVRegister& src) { 4114 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4115 } 4116 4117 4118 LogicVRegister Simulator::fmul(VectorFormat vform, 4119 LogicVRegister dst, 4120 const LogicVRegister& src1, 4121 const LogicVRegister& src2, 4122 int index) { 4123 dst.ClearForWrite(vform); 4124 SimVRegister temp; 4125 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4126 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4127 fmul<float>(vform, dst, src1, index_reg); 4128 4129 } else { 4130 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4131 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4132 fmul<double>(vform, dst, src1, index_reg); 4133 } 4134 return dst; 4135 } 4136 4137 4138 LogicVRegister Simulator::fmla(VectorFormat vform, 4139 LogicVRegister dst, 4140 const LogicVRegister& src1, 4141 const LogicVRegister& src2, 4142 int index) { 4143 dst.ClearForWrite(vform); 4144 SimVRegister temp; 4145 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4146 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4147 fmla<float>(vform, dst, src1, index_reg); 4148 4149 } else { 4150 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4151 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4152 fmla<double>(vform, dst, src1, index_reg); 4153 } 4154 return dst; 4155 } 4156 4157 4158 LogicVRegister Simulator::fmls(VectorFormat vform, 4159 LogicVRegister dst, 4160 const LogicVRegister& src1, 4161 const LogicVRegister& src2, 4162 int index) { 4163 dst.ClearForWrite(vform); 4164 SimVRegister temp; 4165 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4166 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4167 fmls<float>(vform, dst, src1, index_reg); 4168 4169 } else { 4170 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4171 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4172 fmls<double>(vform, dst, src1, index_reg); 4173 } 4174 return dst; 4175 } 4176 4177 4178 LogicVRegister Simulator::fmulx(VectorFormat vform, 4179 LogicVRegister dst, 4180 const LogicVRegister& src1, 4181 const LogicVRegister& src2, 4182 int index) { 4183 dst.ClearForWrite(vform); 4184 SimVRegister temp; 4185 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4186 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4187 fmulx<float>(vform, dst, src1, index_reg); 4188 4189 } else { 4190 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4191 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4192 fmulx<double>(vform, dst, src1, index_reg); 4193 } 4194 return dst; 4195 } 4196 4197 4198 LogicVRegister Simulator::frint(VectorFormat vform, 4199 LogicVRegister dst, 4200 const LogicVRegister& src, 4201 FPRounding rounding_mode, 4202 bool inexact_exception) { 4203 dst.ClearForWrite(vform); 4204 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4205 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4206 float input = src.Float<float>(i); 4207 float rounded = FPRoundInt(input, rounding_mode); 4208 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4209 FPProcessException(); 4210 } 4211 dst.SetFloat<float>(i, rounded); 4212 } 4213 } else { 4214 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4215 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4216 double input = src.Float<double>(i); 4217 double rounded = FPRoundInt(input, rounding_mode); 4218 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4219 FPProcessException(); 4220 } 4221 dst.SetFloat<double>(i, rounded); 4222 } 4223 } 4224 return dst; 4225 } 4226 4227 4228 LogicVRegister Simulator::fcvts(VectorFormat vform, 4229 LogicVRegister dst, 4230 const LogicVRegister& src, 4231 FPRounding rounding_mode, 4232 int fbits) { 4233 dst.ClearForWrite(vform); 4234 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4235 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4236 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4237 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4238 } 4239 } else { 4240 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4241 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4242 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4243 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4244 } 4245 } 4246 return dst; 4247 } 4248 4249 4250 LogicVRegister Simulator::fcvtu(VectorFormat vform, 4251 LogicVRegister dst, 4252 const LogicVRegister& src, 4253 FPRounding rounding_mode, 4254 int fbits) { 4255 dst.ClearForWrite(vform); 4256 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4257 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4258 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4259 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4260 } 4261 } else { 4262 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4263 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4264 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4265 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4266 } 4267 } 4268 return dst; 4269 } 4270 4271 4272 LogicVRegister Simulator::fcvtl(VectorFormat vform, 4273 LogicVRegister dst, 4274 const LogicVRegister& src) { 4275 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4276 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4277 // TODO: Full support for SimFloat16 in SimRegister(s). 4278 dst.SetFloat(i, 4279 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)), 4280 ReadDN())); 4281 } 4282 } else { 4283 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4284 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4285 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN())); 4286 } 4287 } 4288 return dst; 4289 } 4290 4291 4292 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4293 LogicVRegister dst, 4294 const LogicVRegister& src) { 4295 int lane_count = LaneCountFromFormat(vform); 4296 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4297 for (int i = 0; i < lane_count; i++) { 4298 // TODO: Full support for SimFloat16 in SimRegister(s). 4299 dst.SetFloat(i, 4300 FPToFloat(RawbitsToFloat16( 4301 src.Float<uint16_t>(i + lane_count)), 4302 ReadDN())); 4303 } 4304 } else { 4305 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4306 for (int i = 0; i < lane_count; i++) { 4307 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN())); 4308 } 4309 } 4310 return dst; 4311 } 4312 4313 4314 LogicVRegister Simulator::fcvtn(VectorFormat vform, 4315 LogicVRegister dst, 4316 const LogicVRegister& src) { 4317 SimVRegister tmp; 4318 LogicVRegister srctmp = mov(kFormat2D, tmp, src); 4319 dst.ClearForWrite(vform); 4320 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4321 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4322 dst.SetFloat(i, 4323 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i), 4324 FPTieEven, 4325 ReadDN()))); 4326 } 4327 } else { 4328 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4329 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4330 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN())); 4331 } 4332 } 4333 return dst; 4334 } 4335 4336 4337 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4338 LogicVRegister dst, 4339 const LogicVRegister& src) { 4340 int lane_count = LaneCountFromFormat(vform) / 2; 4341 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4342 for (int i = lane_count - 1; i >= 0; i--) { 4343 dst.SetFloat(i + lane_count, 4344 Float16ToRawbits( 4345 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); 4346 } 4347 } else { 4348 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4349 for (int i = lane_count - 1; i >= 0; i--) { 4350 dst.SetFloat(i + lane_count, 4351 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); 4352 } 4353 } 4354 return dst; 4355 } 4356 4357 4358 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4359 LogicVRegister dst, 4360 const LogicVRegister& src) { 4361 SimVRegister tmp; 4362 LogicVRegister srctmp = mov(kFormat2D, tmp, src); 4363 dst.ClearForWrite(vform); 4364 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4365 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4366 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN())); 4367 } 4368 return dst; 4369 } 4370 4371 4372 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4373 LogicVRegister dst, 4374 const LogicVRegister& src) { 4375 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4376 int lane_count = LaneCountFromFormat(vform) / 2; 4377 for (int i = lane_count - 1; i >= 0; i--) { 4378 dst.SetFloat(i + lane_count, 4379 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); 4380 } 4381 return dst; 4382 } 4383 4384 4385 // Based on reference C function recip_sqrt_estimate from ARM ARM. 4386 double Simulator::recip_sqrt_estimate(double a) { 4387 int q0, q1, s; 4388 double r; 4389 if (a < 0.5) { 4390 q0 = static_cast<int>(a * 512.0); 4391 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4392 } else { 4393 q1 = static_cast<int>(a * 256.0); 4394 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4395 } 4396 s = static_cast<int>(256.0 * r + 0.5); 4397 return static_cast<double>(s) / 256.0; 4398 } 4399 4400 4401 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4402 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4403 } 4404 4405 4406 template <typename T> 4407 T Simulator::FPRecipSqrtEstimate(T op) { 4408 if (std::isnan(op)) { 4409 return FPProcessNaN(op); 4410 } else if (op == 0.0) { 4411 if (copysign(1.0, op) < 0.0) { 4412 return kFP64NegativeInfinity; 4413 } else { 4414 return kFP64PositiveInfinity; 4415 } 4416 } else if (copysign(1.0, op) < 0.0) { 4417 FPProcessException(); 4418 return FPDefaultNaN<T>(); 4419 } else if (std::isinf(op)) { 4420 return 0.0; 4421 } else { 4422 uint64_t fraction; 4423 int exp, result_exp; 4424 4425 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4426 exp = FloatExp(op); 4427 fraction = FloatMantissa(op); 4428 fraction <<= 29; 4429 } else { 4430 exp = DoubleExp(op); 4431 fraction = DoubleMantissa(op); 4432 } 4433 4434 if (exp == 0) { 4435 while (Bits(fraction, 51, 51) == 0) { 4436 fraction = Bits(fraction, 50, 0) << 1; 4437 exp -= 1; 4438 } 4439 fraction = Bits(fraction, 50, 0) << 1; 4440 } 4441 4442 double scaled; 4443 if (Bits(exp, 0, 0) == 0) { 4444 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4445 } else { 4446 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4447 } 4448 4449 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4450 result_exp = (380 - exp) / 2; 4451 } else { 4452 result_exp = (3068 - exp) / 2; 4453 } 4454 4455 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 4456 4457 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4458 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4459 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4460 return FloatPack(0, exp_bits, est_bits); 4461 } else { 4462 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4463 } 4464 } 4465 } 4466 4467 4468 LogicVRegister Simulator::frsqrte(VectorFormat vform, 4469 LogicVRegister dst, 4470 const LogicVRegister& src) { 4471 dst.ClearForWrite(vform); 4472 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4473 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4474 float input = src.Float<float>(i); 4475 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4476 } 4477 } else { 4478 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4479 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4480 double input = src.Float<double>(i); 4481 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4482 } 4483 } 4484 return dst; 4485 } 4486 4487 template <typename T> 4488 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4489 uint32_t sign; 4490 4491 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4492 sign = FloatSign(op); 4493 } else { 4494 sign = DoubleSign(op); 4495 } 4496 4497 if (std::isnan(op)) { 4498 return FPProcessNaN(op); 4499 } else if (std::isinf(op)) { 4500 return (sign == 1) ? -0.0 : 0.0; 4501 } else if (op == 0.0) { 4502 FPProcessException(); // FPExc_DivideByZero exception. 4503 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4504 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4505 (std::fabs(op) < std::pow(2.0, -128.0))) || 4506 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4507 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4508 bool overflow_to_inf = false; 4509 switch (rounding) { 4510 case FPTieEven: overflow_to_inf = true; break; 4511 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break; 4512 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break; 4513 case FPZero: overflow_to_inf = false; break; 4514 default: break; 4515 } 4516 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4517 if (overflow_to_inf) { 4518 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4519 } else { 4520 // Return FPMaxNormal(sign). 4521 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4522 return FloatPack(sign, 0xfe, 0x07fffff); 4523 } else { 4524 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 4525 } 4526 } 4527 } else { 4528 uint64_t fraction; 4529 int exp, result_exp; 4530 uint32_t sign; 4531 4532 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4533 sign = FloatSign(op); 4534 exp = FloatExp(op); 4535 fraction = FloatMantissa(op); 4536 fraction <<= 29; 4537 } else { 4538 sign = DoubleSign(op); 4539 exp = DoubleExp(op); 4540 fraction = DoubleMantissa(op); 4541 } 4542 4543 if (exp == 0) { 4544 if (Bits(fraction, 51, 51) == 0) { 4545 exp -= 1; 4546 fraction = Bits(fraction, 49, 0) << 2; 4547 } else { 4548 fraction = Bits(fraction, 50, 0) << 1; 4549 } 4550 } 4551 4552 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4553 4554 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4555 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4556 } else { 4557 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4558 } 4559 4560 double estimate = recip_estimate(scaled); 4561 4562 fraction = DoubleMantissa(estimate); 4563 if (result_exp == 0) { 4564 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4565 } else if (result_exp == -1) { 4566 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4567 result_exp = 0; 4568 } 4569 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4570 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4571 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4572 return FloatPack(sign, exp_bits, frac_bits); 4573 } else { 4574 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4575 } 4576 } 4577 } 4578 4579 4580 LogicVRegister Simulator::frecpe(VectorFormat vform, 4581 LogicVRegister dst, 4582 const LogicVRegister& src, 4583 FPRounding round) { 4584 dst.ClearForWrite(vform); 4585 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4586 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4587 float input = src.Float<float>(i); 4588 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4589 } 4590 } else { 4591 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4592 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4593 double input = src.Float<double>(i); 4594 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4595 } 4596 } 4597 return dst; 4598 } 4599 4600 4601 LogicVRegister Simulator::ursqrte(VectorFormat vform, 4602 LogicVRegister dst, 4603 const LogicVRegister& src) { 4604 dst.ClearForWrite(vform); 4605 uint64_t operand; 4606 uint32_t result; 4607 double dp_operand, dp_result; 4608 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4609 operand = src.Uint(vform, i); 4610 if (operand <= 0x3FFFFFFF) { 4611 result = 0xFFFFFFFF; 4612 } else { 4613 dp_operand = operand * std::pow(2.0, -32); 4614 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4615 result = static_cast<uint32_t>(dp_result); 4616 } 4617 dst.SetUint(vform, i, result); 4618 } 4619 return dst; 4620 } 4621 4622 4623 // Based on reference C function recip_estimate from ARM ARM. 4624 double Simulator::recip_estimate(double a) { 4625 int q, s; 4626 double r; 4627 q = static_cast<int>(a * 512.0); 4628 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4629 s = static_cast<int>(256.0 * r + 0.5); 4630 return static_cast<double>(s) / 256.0; 4631 } 4632 4633 4634 LogicVRegister Simulator::urecpe(VectorFormat vform, 4635 LogicVRegister dst, 4636 const LogicVRegister& src) { 4637 dst.ClearForWrite(vform); 4638 uint64_t operand; 4639 uint32_t result; 4640 double dp_operand, dp_result; 4641 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4642 operand = src.Uint(vform, i); 4643 if (operand <= 0x7FFFFFFF) { 4644 result = 0xFFFFFFFF; 4645 } else { 4646 dp_operand = operand * std::pow(2.0, -32); 4647 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4648 result = static_cast<uint32_t>(dp_result); 4649 } 4650 dst.SetUint(vform, i, result); 4651 } 4652 return dst; 4653 } 4654 4655 template <typename T> 4656 LogicVRegister Simulator::frecpx(VectorFormat vform, 4657 LogicVRegister dst, 4658 const LogicVRegister& src) { 4659 dst.ClearForWrite(vform); 4660 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4661 T op = src.Float<T>(i); 4662 T result; 4663 if (std::isnan(op)) { 4664 result = FPProcessNaN(op); 4665 } else { 4666 int exp; 4667 uint32_t sign; 4668 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4669 sign = FloatSign(op); 4670 exp = FloatExp(op); 4671 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4672 result = FloatPack(sign, exp, 0); 4673 } else { 4674 sign = DoubleSign(op); 4675 exp = DoubleExp(op); 4676 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4677 result = DoublePack(sign, exp, 0); 4678 } 4679 } 4680 dst.SetFloat(i, result); 4681 } 4682 return dst; 4683 } 4684 4685 4686 LogicVRegister Simulator::frecpx(VectorFormat vform, 4687 LogicVRegister dst, 4688 const LogicVRegister& src) { 4689 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4690 frecpx<float>(vform, dst, src); 4691 } else { 4692 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4693 frecpx<double>(vform, dst, src); 4694 } 4695 return dst; 4696 } 4697 4698 LogicVRegister Simulator::scvtf(VectorFormat vform, 4699 LogicVRegister dst, 4700 const LogicVRegister& src, 4701 int fbits, 4702 FPRounding round) { 4703 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4704 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4705 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4706 dst.SetFloat<float>(i, result); 4707 } else { 4708 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4709 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4710 dst.SetFloat<double>(i, result); 4711 } 4712 } 4713 return dst; 4714 } 4715 4716 4717 LogicVRegister Simulator::ucvtf(VectorFormat vform, 4718 LogicVRegister dst, 4719 const LogicVRegister& src, 4720 int fbits, 4721 FPRounding round) { 4722 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4723 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4724 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4725 dst.SetFloat<float>(i, result); 4726 } else { 4727 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4728 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4729 dst.SetFloat<double>(i, result); 4730 } 4731 } 4732 return dst; 4733 } 4734 4735 4736 } // namespace vixl 4737 4738 #endif // JS_SIMULATOR_ARM64