ShuffleAnalysis.cpp (28239B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "jit/ShuffleAnalysis.h" 7 #include "mozilla/MathAlgorithms.h" 8 #include "jit/MIR-wasm.h" 9 #include "jit/MIR.h" 10 #include "wasm/WasmFeatures.h" 11 12 using namespace js; 13 using namespace jit; 14 15 using mozilla::Maybe; 16 using mozilla::Nothing; 17 using mozilla::Some; 18 19 #ifdef ENABLE_WASM_SIMD 20 21 // Specialization analysis for SIMD operations. This is still x86-centric but 22 // generalizes fairly easily to other architectures. 23 24 // Optimization of v8x16.shuffle. The general byte shuffle+blend is very 25 // expensive (equivalent to at least a dozen instructions), and we want to avoid 26 // that if we can. So look for special cases - there are many. 27 // 28 // The strategy is to sort the operation into one of three buckets depending 29 // on the shuffle pattern and inputs: 30 // 31 // - single operand; shuffles on these values are rotations, reversals, 32 // transpositions, and general permutations 33 // - single-operand-with-interesting-constant (especially zero); shuffles on 34 // these values are often byte shift or scatter operations 35 // - dual operand; shuffles on these operations are blends, catenated 36 // shifts, and (in the worst case) general shuffle+blends 37 // 38 // We're not trying to solve the general problem, only to lower reasonably 39 // expressed patterns that express common operations. Producers that produce 40 // dense and convoluted patterns will end up with the general byte shuffle. 41 // Producers that produce simpler patterns that easily map to hardware will 42 // get faster code. 43 // 44 // In particular, these matchers do not try to combine transformations, so a 45 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is 46 // usually going to end up as a general byte shuffle. 47 48 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return 49 // true, updating *control. 50 static bool ByteMaskToWordMask(SimdConstant* control) { 51 const SimdConstant::I8x16& lanes = control->asInt8x16(); 52 int16_t controlWords[8]; 53 for (int i = 0; i < 16; i += 2) { 54 if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) { 55 return false; 56 } 57 controlWords[i / 2] = int16_t(lanes[i] / 2); 58 } 59 *control = SimdConstant::CreateX8(controlWords); 60 return true; 61 } 62 63 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return 64 // true, updating *control. 65 static bool ByteMaskToDWordMask(SimdConstant* control) { 66 const SimdConstant::I8x16& lanes = control->asInt8x16(); 67 int32_t controlDWords[4]; 68 for (int i = 0; i < 16; i += 4) { 69 if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 && 70 lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) { 71 return false; 72 } 73 controlDWords[i / 4] = lanes[i] / 4; 74 } 75 *control = SimdConstant::CreateX4(controlDWords); 76 return true; 77 } 78 79 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return 80 // true, updating *control. 81 static bool ByteMaskToQWordMask(SimdConstant* control) { 82 const SimdConstant::I8x16& lanes = control->asInt8x16(); 83 int64_t controlQWords[2]; 84 for (int i = 0; i < 16; i += 8) { 85 if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 && 86 lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 && 87 lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 && 88 lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) { 89 return false; 90 } 91 controlQWords[i / 8] = lanes[i] / 8; 92 } 93 *control = SimdConstant::CreateX2(controlQWords); 94 return true; 95 } 96 97 // Skip across consecutive values in lanes starting at i, returning the index 98 // after the last element. Lane values must be <= len-1 ("masked"). 99 // 100 // Since every element is a 1-element run, the return value is never the same as 101 // the starting i. 102 template <typename T> 103 static int ScanIncreasingMasked(const T* lanes, int i) { 104 int len = int(16 / sizeof(T)); 105 MOZ_ASSERT(i < len); 106 MOZ_ASSERT(lanes[i] <= len - 1); 107 i++; 108 while (i < len && lanes[i] == lanes[i - 1] + 1) { 109 MOZ_ASSERT(lanes[i] <= len - 1); 110 i++; 111 } 112 return i; 113 } 114 115 // Skip across consecutive values in lanes starting at i, returning the index 116 // after the last element. Lane values must be <= len*2-1 ("unmasked"); the 117 // values len-1 and len are not considered consecutive. 118 // 119 // Since every element is a 1-element run, the return value is never the same as 120 // the starting i. 121 template <typename T> 122 static int ScanIncreasingUnmasked(const T* lanes, int i) { 123 int len = int(16 / sizeof(T)); 124 MOZ_ASSERT(i < len); 125 if (lanes[i] < len) { 126 i++; 127 while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) { 128 i++; 129 } 130 } else { 131 i++; 132 while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) { 133 i++; 134 } 135 } 136 return i; 137 } 138 139 // Skip lanes that equal v starting at i, returning the index just beyond the 140 // last of those. There is no requirement that the initial lanes[i] == v. 141 template <typename T> 142 static int ScanConstant(const T* lanes, int v, int i) { 143 int len = int(16 / sizeof(T)); 144 MOZ_ASSERT(i <= len); 145 while (i < len && lanes[i] == v) { 146 i++; 147 } 148 return i; 149 } 150 151 // Mask lane values denoting rhs elements into lhs elements. 152 template <typename T> 153 static void MaskLanes(T* result, const T* input) { 154 int len = int(16 / sizeof(T)); 155 for (int i = 0; i < len; i++) { 156 result[i] = input[i] & (len - 1); 157 } 158 } 159 160 // Apply a transformation to each lane value. 161 template <typename T> 162 static void MapLanes(T* result, const T* input, int (*f)(int)) { 163 // Hazard analysis trips on "IndirectCall: f" error. 164 // Suppress the check -- `f` is expected to be trivial here. 165 JS::AutoSuppressGCAnalysis nogc; 166 167 int len = int(16 / sizeof(T)); 168 for (int i = 0; i < len; i++) { 169 result[i] = f(input[i]); 170 } 171 } 172 173 // Recognize an identity permutation, assuming lanes is masked. 174 template <typename T> 175 static bool IsIdentity(const T* lanes) { 176 return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T)); 177 } 178 179 // Recognize part of an identity permutation starting at start, with 180 // the first value of the permutation expected to be bias. 181 template <typename T> 182 static bool IsIdentity(const T* lanes, int start, int len, int bias) { 183 if (lanes[start] != bias) { 184 return false; 185 } 186 for (int i = start + 1; i < start + len; i++) { 187 if (lanes[i] != lanes[i - 1] + 1) { 188 return false; 189 } 190 } 191 return true; 192 } 193 194 // We can permute by dwords if the mask is reducible to a dword mask, and in 195 // this case a single PSHUFD is enough. 196 static bool TryPermute32x4(SimdConstant* control) { 197 SimdConstant tmp = *control; 198 if (!ByteMaskToDWordMask(&tmp)) { 199 return false; 200 } 201 *control = tmp; 202 return true; 203 } 204 205 // Can we perform a byte rotate right? We can use PALIGNR. The shift count is 206 // just lanes[0], and *control is unchanged. 207 static bool TryRotateRight8x16(SimdConstant* control) { 208 const SimdConstant::I8x16& lanes = control->asInt8x16(); 209 // Look for the end of the first run of consecutive bytes. 210 int i = ScanIncreasingMasked(lanes, 0); 211 212 // First run must start at a value s.t. we have a rotate if all remaining 213 // bytes are a run. 214 if (lanes[0] != 16 - i) { 215 return false; 216 } 217 218 // If we reached the end of the vector, we're done. 219 if (i == 16) { 220 return true; 221 } 222 223 // Second run must start at source lane zero. 224 if (lanes[i] != 0) { 225 return false; 226 } 227 228 // Second run must end at the end of the lane vector. 229 return ScanIncreasingMasked(lanes, i) == 16; 230 } 231 232 // We can permute by words if the mask is reducible to a word mask. 233 static bool TryPermute16x8(SimdConstant* control) { 234 SimdConstant tmp = *control; 235 if (!ByteMaskToWordMask(&tmp)) { 236 return false; 237 } 238 *control = tmp; 239 return true; 240 } 241 242 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD. 243 static bool TryBroadcast16x8(SimdConstant* control) { 244 SimdConstant tmp = *control; 245 if (!ByteMaskToWordMask(&tmp)) { 246 return false; 247 } 248 const SimdConstant::I16x8& lanes = tmp.asInt16x8(); 249 if (ScanConstant(lanes, lanes[0], 0) < 8) { 250 return false; 251 } 252 *control = tmp; 253 return true; 254 } 255 256 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W + 257 // PSHUFD. 258 static bool TryBroadcast8x16(SimdConstant* control) { 259 const SimdConstant::I8x16& lanes = control->asInt8x16(); 260 return ScanConstant(lanes, lanes[0], 0) >= 16; 261 } 262 263 template <int N> 264 static bool TryReverse(SimdConstant* control) { 265 const SimdConstant::I8x16& lanes = control->asInt8x16(); 266 for (int i = 0; i < 16; i++) { 267 if (lanes[i] != (i ^ (N - 1))) { 268 return false; 269 } 270 } 271 return true; 272 } 273 274 // Look for permutations of a single operand. 275 static SimdPermuteOp AnalyzePermute(SimdConstant* control) { 276 // Lane indices are input-agnostic for single-operand permutations. 277 SimdConstant::I8x16 controlBytes; 278 MaskLanes(controlBytes, control->asInt8x16()); 279 280 // Get rid of no-ops immediately, so nobody else needs to check. 281 if (IsIdentity(controlBytes)) { 282 return SimdPermuteOp::MOVE; 283 } 284 285 // Default control is the masked bytes. 286 *control = SimdConstant::CreateX16(controlBytes); 287 288 // Analysis order matters here and is architecture-dependent or even 289 // microarchitecture-dependent: ideally the cheapest implementation first. 290 // The Intel manual says that the cost of a PSHUFB is about five other 291 // operations, so make that our cutoff. 292 // 293 // Word, dword, and qword reversals are handled optimally by general permutes. 294 // 295 // Byte reversals are probably best left to PSHUFB, no alternative rendition 296 // seems to reliably go below five instructions. (Discuss.) 297 // 298 // Word swaps within doublewords and dword swaps within quadwords are handled 299 // optimally by general permutes. 300 // 301 // Dword and qword broadcasts are handled by dword permute. 302 303 if (TryPermute32x4(control)) { 304 return SimdPermuteOp::PERMUTE_32x4; 305 } 306 if (TryRotateRight8x16(control)) { 307 return SimdPermuteOp::ROTATE_RIGHT_8x16; 308 } 309 if (TryBroadcast16x8(control)) { 310 return SimdPermuteOp::BROADCAST_16x8; 311 } 312 if (TryPermute16x8(control)) { 313 return SimdPermuteOp::PERMUTE_16x8; 314 } 315 if (TryBroadcast8x16(control)) { 316 return SimdPermuteOp::BROADCAST_8x16; 317 } 318 if (TryReverse<2>(control)) { 319 return SimdPermuteOp::REVERSE_16x8; 320 } 321 if (TryReverse<4>(control)) { 322 return SimdPermuteOp::REVERSE_32x4; 323 } 324 if (TryReverse<8>(control)) { 325 return SimdPermuteOp::REVERSE_64x2; 326 } 327 328 // TODO: (From v8) Unzip and transpose generally have renditions that slightly 329 // beat a general permute (three or four instructions) 330 // 331 // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be 332 // used when merging two values. 333 334 // The default operation is to permute bytes with the default control. 335 return SimdPermuteOp::PERMUTE_8x16; 336 } 337 338 // Can we shift the bytes left or right by a constant? A shift is a run of 339 // lanes from the rhs (which is zero) on one end and a run of values from the 340 // lhs on the other end. 341 static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) { 342 const SimdConstant::I8x16& lanes = control->asInt8x16(); 343 344 // Represent all zero lanes by 16 345 SimdConstant::I8x16 zeroesMasked; 346 MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; }); 347 348 int i = ScanConstant(zeroesMasked, 16, 0); 349 int shiftLeft = i; 350 if (shiftLeft > 0 && lanes[shiftLeft] != 0) { 351 return Nothing(); 352 } 353 354 i = ScanIncreasingUnmasked(zeroesMasked, i); 355 int shiftRight = 16 - i; 356 if (shiftRight > 0 && lanes[i - 1] != 15) { 357 return Nothing(); 358 } 359 360 i = ScanConstant(zeroesMasked, 16, i); 361 if (i < 16 || (shiftRight > 0 && shiftLeft > 0) || 362 (shiftRight == 0 && shiftLeft == 0)) { 363 return Nothing(); 364 } 365 366 if (shiftRight) { 367 *control = SimdConstant::SplatX16((int8_t)shiftRight); 368 return Some(SimdPermuteOp::SHIFT_RIGHT_8x16); 369 } 370 *control = SimdConstant::SplatX16((int8_t)shiftLeft); 371 return Some(SimdPermuteOp::SHIFT_LEFT_8x16); 372 } 373 374 // Check if it is unsigned integer extend operation. 375 static Maybe<SimdPermuteOp> TryZeroExtend(SimdConstant* control) { 376 const SimdConstant::I8x16& lanes = control->asInt8x16(); 377 378 // Find fragment of sequantial lanes indices that starts from 0. 379 uint32_t i = 0; 380 for (; i <= 4 && lanes[i] == int8_t(i); i++) { 381 } 382 // The length of the fragment has to be a power of 2, and next item is zero. 383 if (!mozilla::IsPowerOfTwo(i) || lanes[i] < 16) { 384 return Nothing(); 385 } 386 MOZ_ASSERT(i > 0 && i <= 4); 387 uint32_t fromLen = i; 388 // Skip items that will be zero'ed. 389 for (; i <= 8 && lanes[i] >= 16; i++) { 390 } 391 // The length of the entire fragment of zero and non-zero items 392 // needs to be power of 2. 393 if (!mozilla::IsPowerOfTwo(i)) { 394 return Nothing(); 395 } 396 MOZ_ASSERT(i > fromLen && i <= 8); 397 uint32_t toLen = i; 398 399 // The sequence will repeat every toLen elements: in which first 400 // fromLen items are sequential lane indices, and the rest are zeros. 401 int8_t current = int8_t(fromLen); 402 for (; i < 16; i++) { 403 if ((i % toLen) >= fromLen) { 404 // Expect the item be a zero. 405 if (lanes[i] < 16) { 406 return Nothing(); 407 } 408 } else { 409 // Check the item is in ascending sequence. 410 if (lanes[i] != current) { 411 return Nothing(); 412 } 413 current++; 414 } 415 } 416 417 switch (fromLen) { 418 case 1: 419 switch (toLen) { 420 case 2: 421 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8); 422 case 4: 423 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4); 424 case 8: 425 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2); 426 } 427 break; 428 case 2: 429 switch (toLen) { 430 case 4: 431 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4); 432 case 8: 433 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2); 434 } 435 break; 436 case 4: 437 switch (toLen) { 438 case 8: 439 return Some(SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2); 440 } 441 break; 442 } 443 MOZ_CRASH("Invalid TryZeroExtend match"); 444 } 445 446 static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) { 447 Maybe<SimdPermuteOp> op; 448 op = TryShift8x16(control); 449 if (op) { 450 return op; 451 } 452 453 op = TryZeroExtend(control); 454 if (op) { 455 return op; 456 } 457 458 // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST; 459 // PAND. This may beat the general byte blend code below. 460 return Nothing(); 461 } 462 463 // Concat: if the result is the suffix (high bytes) of the rhs in front of a 464 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are 465 // swapped. 466 static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control, 467 bool* swapOperands) { 468 const SimdConstant::I8x16& lanes = control->asInt8x16(); 469 int i = ScanIncreasingUnmasked(lanes, 0); 470 MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere"); 471 // First run must end with 15 % 16 472 if ((lanes[i - 1] & 15) != 15) { 473 return Nothing(); 474 } 475 // Second run must start with 0 % 16 476 if ((lanes[i] & 15) != 0) { 477 return Nothing(); 478 } 479 // The two runs must come from different inputs 480 if ((lanes[i] & 16) == (lanes[i - 1] & 16)) { 481 return Nothing(); 482 } 483 int suffixLength = i; 484 485 i = ScanIncreasingUnmasked(lanes, i); 486 // Must end at the left end 487 if (i != 16) { 488 return Nothing(); 489 } 490 491 // If the suffix is from the lhs then swap the operands 492 if (lanes[0] < 16) { 493 *swapOperands = !*swapOperands; 494 } 495 *control = SimdConstant::SplatX16((int8_t)suffixLength); 496 return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16); 497 } 498 499 // Blend words: if we pick words from both operands without a pattern but all 500 // the input words stay in their position then this is PBLENDW (immediate mask); 501 // this also handles all larger sizes on x64. 502 static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) { 503 SimdConstant tmp(*control); 504 if (!ByteMaskToWordMask(&tmp)) { 505 return Nothing(); 506 } 507 SimdConstant::I16x8 masked; 508 MaskLanes(masked, tmp.asInt16x8()); 509 if (!IsIdentity(masked)) { 510 return Nothing(); 511 } 512 SimdConstant::I16x8 mapped; 513 MapLanes(mapped, tmp.asInt16x8(), 514 [](int x) -> int { return x < 8 ? 0 : -1; }); 515 *control = SimdConstant::CreateX8(mapped); 516 return Some(SimdShuffleOp::BLEND_16x8); 517 } 518 519 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be 520 // handled with a CONST, PAND, PANDNOT, and POR. 521 // 522 // TODO: Optimization opportunity? If we pick all but one lanes from one with at 523 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this 524 // element is not in its source location). 525 static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) { 526 SimdConstant::I8x16 masked; 527 MaskLanes(masked, control->asInt8x16()); 528 if (!IsIdentity(masked)) { 529 return Nothing(); 530 } 531 SimdConstant::I8x16 mapped; 532 MapLanes(mapped, control->asInt8x16(), 533 [](int x) -> int { return x < 16 ? 0 : -1; }); 534 *control = SimdConstant::CreateX16(mapped); 535 return Some(SimdShuffleOp::BLEND_8x16); 536 } 537 538 template <typename T> 539 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) { 540 for (int i = 0; i < len; i++) { 541 if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) { 542 return false; 543 } 544 } 545 return true; 546 } 547 548 // Unpack/interleave: 549 // - if we interleave the low (bytes/words/doublewords) of the inputs into 550 // the output then this is UNPCKL*W (possibly with a swap of operands). 551 // - if we interleave the high ditto then it is UNPCKH*W (ditto) 552 template <typename T> 553 static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs, 554 bool* swapOperands, 555 SimdShuffleOp lowOp, 556 SimdShuffleOp highOp) { 557 int len = int(32 / (sizeof(T) * 4)); 558 if (MatchInterleave(lanes, lhs, rhs, len)) { 559 return Some(lowOp); 560 } 561 if (MatchInterleave(lanes, rhs, lhs, len)) { 562 *swapOperands = !*swapOperands; 563 return Some(lowOp); 564 } 565 if (MatchInterleave(lanes, lhs + len, rhs + len, len)) { 566 return Some(highOp); 567 } 568 if (MatchInterleave(lanes, rhs + len, lhs + len, len)) { 569 *swapOperands = !*swapOperands; 570 return Some(highOp); 571 } 572 return Nothing(); 573 } 574 575 static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control, 576 bool* swapOperands) { 577 SimdConstant tmp = *control; 578 if (!ByteMaskToQWordMask(&tmp)) { 579 return Nothing(); 580 } 581 const SimdConstant::I64x2& lanes = tmp.asInt64x2(); 582 return TryInterleave(lanes, 0, 2, swapOperands, 583 SimdShuffleOp::INTERLEAVE_LOW_64x2, 584 SimdShuffleOp::INTERLEAVE_HIGH_64x2); 585 } 586 587 static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control, 588 bool* swapOperands) { 589 SimdConstant tmp = *control; 590 if (!ByteMaskToDWordMask(&tmp)) { 591 return Nothing(); 592 } 593 const SimdConstant::I32x4& lanes = tmp.asInt32x4(); 594 return TryInterleave(lanes, 0, 4, swapOperands, 595 SimdShuffleOp::INTERLEAVE_LOW_32x4, 596 SimdShuffleOp::INTERLEAVE_HIGH_32x4); 597 } 598 599 static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control, 600 bool* swapOperands) { 601 SimdConstant tmp = *control; 602 if (!ByteMaskToWordMask(&tmp)) { 603 return Nothing(); 604 } 605 const SimdConstant::I16x8& lanes = tmp.asInt16x8(); 606 return TryInterleave(lanes, 0, 8, swapOperands, 607 SimdShuffleOp::INTERLEAVE_LOW_16x8, 608 SimdShuffleOp::INTERLEAVE_HIGH_16x8); 609 } 610 611 static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control, 612 bool* swapOperands) { 613 const SimdConstant::I8x16& lanes = control->asInt8x16(); 614 return TryInterleave(lanes, 0, 16, swapOperands, 615 SimdShuffleOp::INTERLEAVE_LOW_8x16, 616 SimdShuffleOp::INTERLEAVE_HIGH_8x16); 617 } 618 619 static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control, 620 bool* swapOperands) { 621 Maybe<SimdShuffleOp> op; 622 op = TryConcatRightShift8x16(control, swapOperands); 623 if (!op) { 624 op = TryBlendInt16x8(control); 625 } 626 if (!op) { 627 op = TryBlendInt8x16(control); 628 } 629 if (!op) { 630 op = TryInterleave64x2(control, swapOperands); 631 } 632 if (!op) { 633 op = TryInterleave32x4(control, swapOperands); 634 } 635 if (!op) { 636 op = TryInterleave16x8(control, swapOperands); 637 } 638 if (!op) { 639 op = TryInterleave8x16(control, swapOperands); 640 } 641 if (!op) { 642 op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16); 643 } 644 return *op; 645 } 646 647 // Reorder the operands if that seems useful, notably, move a constant to the 648 // right hand side. Rewrites the control to account for any move. 649 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs, 650 SimdConstant* control) { 651 if ((*lhs)->isWasmFloatConstant()) { 652 MDefinition* tmp = *lhs; 653 *lhs = *rhs; 654 *rhs = tmp; 655 656 int8_t controlBytes[16]; 657 const SimdConstant::I8x16& lanes = control->asInt8x16(); 658 for (unsigned i = 0; i < 16; i++) { 659 controlBytes[i] = int8_t(lanes[i] ^ 16); 660 } 661 *control = SimdConstant::CreateX16(controlBytes); 662 663 return true; 664 } 665 return false; 666 } 667 668 # ifdef DEBUG 669 static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) { 670 switch (s.opd) { 671 case SimdShuffle::Operand::BOTH: 672 case SimdShuffle::Operand::BOTH_SWAPPED: 673 switch (*s.shuffleOp) { 674 case SimdShuffleOp::SHUFFLE_BLEND_8x16: 675 js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16"); 676 break; 677 case SimdShuffleOp::BLEND_8x16: 678 js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16"); 679 break; 680 case SimdShuffleOp::BLEND_16x8: 681 js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8"); 682 break; 683 case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: 684 js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16"); 685 break; 686 case SimdShuffleOp::INTERLEAVE_HIGH_8x16: 687 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16"); 688 break; 689 case SimdShuffleOp::INTERLEAVE_HIGH_16x8: 690 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8"); 691 break; 692 case SimdShuffleOp::INTERLEAVE_HIGH_32x4: 693 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4"); 694 break; 695 case SimdShuffleOp::INTERLEAVE_HIGH_64x2: 696 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2"); 697 break; 698 case SimdShuffleOp::INTERLEAVE_LOW_8x16: 699 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16"); 700 break; 701 case SimdShuffleOp::INTERLEAVE_LOW_16x8: 702 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8"); 703 break; 704 case SimdShuffleOp::INTERLEAVE_LOW_32x4: 705 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4"); 706 break; 707 case SimdShuffleOp::INTERLEAVE_LOW_64x2: 708 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2"); 709 break; 710 default: 711 MOZ_CRASH("Unexpected shuffle op"); 712 } 713 break; 714 case SimdShuffle::Operand::LEFT: 715 case SimdShuffle::Operand::RIGHT: 716 switch (*s.permuteOp) { 717 case SimdPermuteOp::BROADCAST_8x16: 718 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16"); 719 break; 720 case SimdPermuteOp::BROADCAST_16x8: 721 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8"); 722 break; 723 case SimdPermuteOp::MOVE: 724 js::wasm::ReportSimdAnalysis("shuffle -> move"); 725 break; 726 case SimdPermuteOp::REVERSE_16x8: 727 js::wasm::ReportSimdAnalysis( 728 "shuffle -> reverse bytes in 16-bit lanes"); 729 break; 730 case SimdPermuteOp::REVERSE_32x4: 731 js::wasm::ReportSimdAnalysis( 732 "shuffle -> reverse bytes in 32-bit lanes"); 733 break; 734 case SimdPermuteOp::REVERSE_64x2: 735 js::wasm::ReportSimdAnalysis( 736 "shuffle -> reverse bytes in 64-bit lanes"); 737 break; 738 case SimdPermuteOp::PERMUTE_8x16: 739 js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16"); 740 break; 741 case SimdPermuteOp::PERMUTE_16x8: 742 js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8"); 743 break; 744 case SimdPermuteOp::PERMUTE_32x4: 745 js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4"); 746 break; 747 case SimdPermuteOp::ROTATE_RIGHT_8x16: 748 js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16"); 749 break; 750 case SimdPermuteOp::SHIFT_LEFT_8x16: 751 js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16"); 752 break; 753 case SimdPermuteOp::SHIFT_RIGHT_8x16: 754 js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16"); 755 break; 756 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8: 757 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 16x8"); 758 break; 759 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4: 760 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 32x4"); 761 break; 762 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2: 763 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 64x2"); 764 break; 765 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4: 766 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 32x4"); 767 break; 768 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2: 769 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 64x2"); 770 break; 771 case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2: 772 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 32x4 to 64x2"); 773 break; 774 default: 775 MOZ_CRASH("Unexpected permute op"); 776 } 777 break; 778 } 779 return s; 780 } 781 # endif // DEBUG 782 783 SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs, 784 MDefinition* rhs) { 785 # ifdef DEBUG 786 # define R(s) ReportShuffleSpecialization(s) 787 # else 788 # define R(s) (s) 789 # endif 790 791 // If only one of the inputs is used, determine which. 792 bool useLeft = true; 793 bool useRight = true; 794 if (lhs == rhs) { 795 useRight = false; 796 } else { 797 bool allAbove = true; 798 bool allBelow = true; 799 const SimdConstant::I8x16& lanes = control.asInt8x16(); 800 for (int8_t i : lanes) { 801 allAbove = allAbove && i >= 16; 802 allBelow = allBelow && i < 16; 803 } 804 if (allAbove) { 805 useLeft = false; 806 } else if (allBelow) { 807 useRight = false; 808 } 809 } 810 811 // Deal with one-ignored-input. 812 if (!(useLeft && useRight)) { 813 SimdPermuteOp op = AnalyzePermute(&control); 814 return R(SimdShuffle::permute( 815 useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT, 816 control, op)); 817 } 818 819 // Move constants to rhs. 820 bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control); 821 822 // Deal with constant rhs. 823 if (rhs->isWasmFloatConstant()) { 824 SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128(); 825 if (rhsConstant.isZeroBits()) { 826 Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control); 827 if (op) { 828 return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT 829 : SimdShuffle::Operand::LEFT, 830 control, *op)); 831 } 832 } 833 } 834 835 // Two operands both of which are used. If there's one constant operand it is 836 // now on the rhs. 837 SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands); 838 return R(SimdShuffle::shuffle(swapOperands 839 ? SimdShuffle::Operand::BOTH_SWAPPED 840 : SimdShuffle::Operand::BOTH, 841 control, op)); 842 # undef R 843 } 844 845 #endif // ENABLE_WASM_SIMD