tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ShuffleAnalysis.cpp (28239B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "jit/ShuffleAnalysis.h"
      7 #include "mozilla/MathAlgorithms.h"
      8 #include "jit/MIR-wasm.h"
      9 #include "jit/MIR.h"
     10 #include "wasm/WasmFeatures.h"
     11 
     12 using namespace js;
     13 using namespace jit;
     14 
     15 using mozilla::Maybe;
     16 using mozilla::Nothing;
     17 using mozilla::Some;
     18 
     19 #ifdef ENABLE_WASM_SIMD
     20 
     21 // Specialization analysis for SIMD operations.  This is still x86-centric but
     22 // generalizes fairly easily to other architectures.
     23 
     24 // Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
     25 // expensive (equivalent to at least a dozen instructions), and we want to avoid
     26 // that if we can.  So look for special cases - there are many.
     27 //
     28 // The strategy is to sort the operation into one of three buckets depending
     29 // on the shuffle pattern and inputs:
     30 //
     31 //  - single operand; shuffles on these values are rotations, reversals,
     32 //    transpositions, and general permutations
     33 //  - single-operand-with-interesting-constant (especially zero); shuffles on
     34 //    these values are often byte shift or scatter operations
     35 //  - dual operand; shuffles on these operations are blends, catenated
     36 //    shifts, and (in the worst case) general shuffle+blends
     37 //
     38 // We're not trying to solve the general problem, only to lower reasonably
     39 // expressed patterns that express common operations.  Producers that produce
     40 // dense and convoluted patterns will end up with the general byte shuffle.
     41 // Producers that produce simpler patterns that easily map to hardware will
     42 // get faster code.
     43 //
     44 // In particular, these matchers do not try to combine transformations, so a
     45 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
     46 // usually going to end up as a general byte shuffle.
     47 
     48 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
     49 // true, updating *control.
     50 static bool ByteMaskToWordMask(SimdConstant* control) {
     51  const SimdConstant::I8x16& lanes = control->asInt8x16();
     52  int16_t controlWords[8];
     53  for (int i = 0; i < 16; i += 2) {
     54    if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
     55      return false;
     56    }
     57    controlWords[i / 2] = int16_t(lanes[i] / 2);
     58  }
     59  *control = SimdConstant::CreateX8(controlWords);
     60  return true;
     61 }
     62 
     63 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
     64 // true, updating *control.
     65 static bool ByteMaskToDWordMask(SimdConstant* control) {
     66  const SimdConstant::I8x16& lanes = control->asInt8x16();
     67  int32_t controlDWords[4];
     68  for (int i = 0; i < 16; i += 4) {
     69    if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
     70          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
     71      return false;
     72    }
     73    controlDWords[i / 4] = lanes[i] / 4;
     74  }
     75  *control = SimdConstant::CreateX4(controlDWords);
     76  return true;
     77 }
     78 
     79 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
     80 // true, updating *control.
     81 static bool ByteMaskToQWordMask(SimdConstant* control) {
     82  const SimdConstant::I8x16& lanes = control->asInt8x16();
     83  int64_t controlQWords[2];
     84  for (int i = 0; i < 16; i += 8) {
     85    if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
     86          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
     87          lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
     88          lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
     89      return false;
     90    }
     91    controlQWords[i / 8] = lanes[i] / 8;
     92  }
     93  *control = SimdConstant::CreateX2(controlQWords);
     94  return true;
     95 }
     96 
     97 // Skip across consecutive values in lanes starting at i, returning the index
     98 // after the last element.  Lane values must be <= len-1 ("masked").
     99 //
    100 // Since every element is a 1-element run, the return value is never the same as
    101 // the starting i.
    102 template <typename T>
    103 static int ScanIncreasingMasked(const T* lanes, int i) {
    104  int len = int(16 / sizeof(T));
    105  MOZ_ASSERT(i < len);
    106  MOZ_ASSERT(lanes[i] <= len - 1);
    107  i++;
    108  while (i < len && lanes[i] == lanes[i - 1] + 1) {
    109    MOZ_ASSERT(lanes[i] <= len - 1);
    110    i++;
    111  }
    112  return i;
    113 }
    114 
    115 // Skip across consecutive values in lanes starting at i, returning the index
    116 // after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
    117 // values len-1 and len are not considered consecutive.
    118 //
    119 // Since every element is a 1-element run, the return value is never the same as
    120 // the starting i.
    121 template <typename T>
    122 static int ScanIncreasingUnmasked(const T* lanes, int i) {
    123  int len = int(16 / sizeof(T));
    124  MOZ_ASSERT(i < len);
    125  if (lanes[i] < len) {
    126    i++;
    127    while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
    128      i++;
    129    }
    130  } else {
    131    i++;
    132    while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
    133      i++;
    134    }
    135  }
    136  return i;
    137 }
    138 
    139 // Skip lanes that equal v starting at i, returning the index just beyond the
    140 // last of those.  There is no requirement that the initial lanes[i] == v.
    141 template <typename T>
    142 static int ScanConstant(const T* lanes, int v, int i) {
    143  int len = int(16 / sizeof(T));
    144  MOZ_ASSERT(i <= len);
    145  while (i < len && lanes[i] == v) {
    146    i++;
    147  }
    148  return i;
    149 }
    150 
    151 // Mask lane values denoting rhs elements into lhs elements.
    152 template <typename T>
    153 static void MaskLanes(T* result, const T* input) {
    154  int len = int(16 / sizeof(T));
    155  for (int i = 0; i < len; i++) {
    156    result[i] = input[i] & (len - 1);
    157  }
    158 }
    159 
    160 // Apply a transformation to each lane value.
    161 template <typename T>
    162 static void MapLanes(T* result, const T* input, int (*f)(int)) {
    163  // Hazard analysis trips on "IndirectCall: f" error.
    164  // Suppress the check -- `f` is expected to be trivial here.
    165  JS::AutoSuppressGCAnalysis nogc;
    166 
    167  int len = int(16 / sizeof(T));
    168  for (int i = 0; i < len; i++) {
    169    result[i] = f(input[i]);
    170  }
    171 }
    172 
    173 // Recognize an identity permutation, assuming lanes is masked.
    174 template <typename T>
    175 static bool IsIdentity(const T* lanes) {
    176  return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
    177 }
    178 
    179 // Recognize part of an identity permutation starting at start, with
    180 // the first value of the permutation expected to be bias.
    181 template <typename T>
    182 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
    183  if (lanes[start] != bias) {
    184    return false;
    185  }
    186  for (int i = start + 1; i < start + len; i++) {
    187    if (lanes[i] != lanes[i - 1] + 1) {
    188      return false;
    189    }
    190  }
    191  return true;
    192 }
    193 
    194 // We can permute by dwords if the mask is reducible to a dword mask, and in
    195 // this case a single PSHUFD is enough.
    196 static bool TryPermute32x4(SimdConstant* control) {
    197  SimdConstant tmp = *control;
    198  if (!ByteMaskToDWordMask(&tmp)) {
    199    return false;
    200  }
    201  *control = tmp;
    202  return true;
    203 }
    204 
    205 // Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
    206 // just lanes[0], and *control is unchanged.
    207 static bool TryRotateRight8x16(SimdConstant* control) {
    208  const SimdConstant::I8x16& lanes = control->asInt8x16();
    209  // Look for the end of the first run of consecutive bytes.
    210  int i = ScanIncreasingMasked(lanes, 0);
    211 
    212  // First run must start at a value s.t. we have a rotate if all remaining
    213  // bytes are a run.
    214  if (lanes[0] != 16 - i) {
    215    return false;
    216  }
    217 
    218  // If we reached the end of the vector, we're done.
    219  if (i == 16) {
    220    return true;
    221  }
    222 
    223  // Second run must start at source lane zero.
    224  if (lanes[i] != 0) {
    225    return false;
    226  }
    227 
    228  // Second run must end at the end of the lane vector.
    229  return ScanIncreasingMasked(lanes, i) == 16;
    230 }
    231 
    232 // We can permute by words if the mask is reducible to a word mask.
    233 static bool TryPermute16x8(SimdConstant* control) {
    234  SimdConstant tmp = *control;
    235  if (!ByteMaskToWordMask(&tmp)) {
    236    return false;
    237  }
    238  *control = tmp;
    239  return true;
    240 }
    241 
    242 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
    243 static bool TryBroadcast16x8(SimdConstant* control) {
    244  SimdConstant tmp = *control;
    245  if (!ByteMaskToWordMask(&tmp)) {
    246    return false;
    247  }
    248  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
    249  if (ScanConstant(lanes, lanes[0], 0) < 8) {
    250    return false;
    251  }
    252  *control = tmp;
    253  return true;
    254 }
    255 
    256 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
    257 // PSHUFD.
    258 static bool TryBroadcast8x16(SimdConstant* control) {
    259  const SimdConstant::I8x16& lanes = control->asInt8x16();
    260  return ScanConstant(lanes, lanes[0], 0) >= 16;
    261 }
    262 
    263 template <int N>
    264 static bool TryReverse(SimdConstant* control) {
    265  const SimdConstant::I8x16& lanes = control->asInt8x16();
    266  for (int i = 0; i < 16; i++) {
    267    if (lanes[i] != (i ^ (N - 1))) {
    268      return false;
    269    }
    270  }
    271  return true;
    272 }
    273 
    274 // Look for permutations of a single operand.
    275 static SimdPermuteOp AnalyzePermute(SimdConstant* control) {
    276  // Lane indices are input-agnostic for single-operand permutations.
    277  SimdConstant::I8x16 controlBytes;
    278  MaskLanes(controlBytes, control->asInt8x16());
    279 
    280  // Get rid of no-ops immediately, so nobody else needs to check.
    281  if (IsIdentity(controlBytes)) {
    282    return SimdPermuteOp::MOVE;
    283  }
    284 
    285  // Default control is the masked bytes.
    286  *control = SimdConstant::CreateX16(controlBytes);
    287 
    288  // Analysis order matters here and is architecture-dependent or even
    289  // microarchitecture-dependent: ideally the cheapest implementation first.
    290  // The Intel manual says that the cost of a PSHUFB is about five other
    291  // operations, so make that our cutoff.
    292  //
    293  // Word, dword, and qword reversals are handled optimally by general permutes.
    294  //
    295  // Byte reversals are probably best left to PSHUFB, no alternative rendition
    296  // seems to reliably go below five instructions.  (Discuss.)
    297  //
    298  // Word swaps within doublewords and dword swaps within quadwords are handled
    299  // optimally by general permutes.
    300  //
    301  // Dword and qword broadcasts are handled by dword permute.
    302 
    303  if (TryPermute32x4(control)) {
    304    return SimdPermuteOp::PERMUTE_32x4;
    305  }
    306  if (TryRotateRight8x16(control)) {
    307    return SimdPermuteOp::ROTATE_RIGHT_8x16;
    308  }
    309  if (TryBroadcast16x8(control)) {
    310    return SimdPermuteOp::BROADCAST_16x8;
    311  }
    312  if (TryPermute16x8(control)) {
    313    return SimdPermuteOp::PERMUTE_16x8;
    314  }
    315  if (TryBroadcast8x16(control)) {
    316    return SimdPermuteOp::BROADCAST_8x16;
    317  }
    318  if (TryReverse<2>(control)) {
    319    return SimdPermuteOp::REVERSE_16x8;
    320  }
    321  if (TryReverse<4>(control)) {
    322    return SimdPermuteOp::REVERSE_32x4;
    323  }
    324  if (TryReverse<8>(control)) {
    325    return SimdPermuteOp::REVERSE_64x2;
    326  }
    327 
    328  // TODO: (From v8) Unzip and transpose generally have renditions that slightly
    329  // beat a general permute (three or four instructions)
    330  //
    331  // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
    332  // used when merging two values.
    333 
    334  // The default operation is to permute bytes with the default control.
    335  return SimdPermuteOp::PERMUTE_8x16;
    336 }
    337 
    338 // Can we shift the bytes left or right by a constant?  A shift is a run of
    339 // lanes from the rhs (which is zero) on one end and a run of values from the
    340 // lhs on the other end.
    341 static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) {
    342  const SimdConstant::I8x16& lanes = control->asInt8x16();
    343 
    344  // Represent all zero lanes by 16
    345  SimdConstant::I8x16 zeroesMasked;
    346  MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
    347 
    348  int i = ScanConstant(zeroesMasked, 16, 0);
    349  int shiftLeft = i;
    350  if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
    351    return Nothing();
    352  }
    353 
    354  i = ScanIncreasingUnmasked(zeroesMasked, i);
    355  int shiftRight = 16 - i;
    356  if (shiftRight > 0 && lanes[i - 1] != 15) {
    357    return Nothing();
    358  }
    359 
    360  i = ScanConstant(zeroesMasked, 16, i);
    361  if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
    362      (shiftRight == 0 && shiftLeft == 0)) {
    363    return Nothing();
    364  }
    365 
    366  if (shiftRight) {
    367    *control = SimdConstant::SplatX16((int8_t)shiftRight);
    368    return Some(SimdPermuteOp::SHIFT_RIGHT_8x16);
    369  }
    370  *control = SimdConstant::SplatX16((int8_t)shiftLeft);
    371  return Some(SimdPermuteOp::SHIFT_LEFT_8x16);
    372 }
    373 
    374 // Check if it is unsigned integer extend operation.
    375 static Maybe<SimdPermuteOp> TryZeroExtend(SimdConstant* control) {
    376  const SimdConstant::I8x16& lanes = control->asInt8x16();
    377 
    378  // Find fragment of sequantial lanes indices that starts from 0.
    379  uint32_t i = 0;
    380  for (; i <= 4 && lanes[i] == int8_t(i); i++) {
    381  }
    382  // The length of the fragment has to be a power of 2, and next item is zero.
    383  if (!mozilla::IsPowerOfTwo(i) || lanes[i] < 16) {
    384    return Nothing();
    385  }
    386  MOZ_ASSERT(i > 0 && i <= 4);
    387  uint32_t fromLen = i;
    388  // Skip items that will be zero'ed.
    389  for (; i <= 8 && lanes[i] >= 16; i++) {
    390  }
    391  // The length of the entire fragment of zero and non-zero items
    392  // needs to be power of 2.
    393  if (!mozilla::IsPowerOfTwo(i)) {
    394    return Nothing();
    395  }
    396  MOZ_ASSERT(i > fromLen && i <= 8);
    397  uint32_t toLen = i;
    398 
    399  // The sequence will repeat every toLen elements: in which first
    400  // fromLen items are sequential lane indices, and the rest are zeros.
    401  int8_t current = int8_t(fromLen);
    402  for (; i < 16; i++) {
    403    if ((i % toLen) >= fromLen) {
    404      // Expect the item be a zero.
    405      if (lanes[i] < 16) {
    406        return Nothing();
    407      }
    408    } else {
    409      // Check the item is in ascending sequence.
    410      if (lanes[i] != current) {
    411        return Nothing();
    412      }
    413      current++;
    414    }
    415  }
    416 
    417  switch (fromLen) {
    418    case 1:
    419      switch (toLen) {
    420        case 2:
    421          return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8);
    422        case 4:
    423          return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4);
    424        case 8:
    425          return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2);
    426      }
    427      break;
    428    case 2:
    429      switch (toLen) {
    430        case 4:
    431          return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4);
    432        case 8:
    433          return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2);
    434      }
    435      break;
    436    case 4:
    437      switch (toLen) {
    438        case 8:
    439          return Some(SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2);
    440      }
    441      break;
    442  }
    443  MOZ_CRASH("Invalid TryZeroExtend match");
    444 }
    445 
    446 static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) {
    447  Maybe<SimdPermuteOp> op;
    448  op = TryShift8x16(control);
    449  if (op) {
    450    return op;
    451  }
    452 
    453  op = TryZeroExtend(control);
    454  if (op) {
    455    return op;
    456  }
    457 
    458  // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
    459  // PAND.  This may beat the general byte blend code below.
    460  return Nothing();
    461 }
    462 
    463 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
    464 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
    465 // swapped.
    466 static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control,
    467                                                    bool* swapOperands) {
    468  const SimdConstant::I8x16& lanes = control->asInt8x16();
    469  int i = ScanIncreasingUnmasked(lanes, 0);
    470  MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
    471  // First run must end with 15 % 16
    472  if ((lanes[i - 1] & 15) != 15) {
    473    return Nothing();
    474  }
    475  // Second run must start with 0 % 16
    476  if ((lanes[i] & 15) != 0) {
    477    return Nothing();
    478  }
    479  // The two runs must come from different inputs
    480  if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
    481    return Nothing();
    482  }
    483  int suffixLength = i;
    484 
    485  i = ScanIncreasingUnmasked(lanes, i);
    486  // Must end at the left end
    487  if (i != 16) {
    488    return Nothing();
    489  }
    490 
    491  // If the suffix is from the lhs then swap the operands
    492  if (lanes[0] < 16) {
    493    *swapOperands = !*swapOperands;
    494  }
    495  *control = SimdConstant::SplatX16((int8_t)suffixLength);
    496  return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16);
    497 }
    498 
    499 // Blend words: if we pick words from both operands without a pattern but all
    500 // the input words stay in their position then this is PBLENDW (immediate mask);
    501 // this also handles all larger sizes on x64.
    502 static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) {
    503  SimdConstant tmp(*control);
    504  if (!ByteMaskToWordMask(&tmp)) {
    505    return Nothing();
    506  }
    507  SimdConstant::I16x8 masked;
    508  MaskLanes(masked, tmp.asInt16x8());
    509  if (!IsIdentity(masked)) {
    510    return Nothing();
    511  }
    512  SimdConstant::I16x8 mapped;
    513  MapLanes(mapped, tmp.asInt16x8(),
    514           [](int x) -> int { return x < 8 ? 0 : -1; });
    515  *control = SimdConstant::CreateX8(mapped);
    516  return Some(SimdShuffleOp::BLEND_16x8);
    517 }
    518 
    519 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
    520 // handled with a CONST, PAND, PANDNOT, and POR.
    521 //
    522 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
    523 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
    524 // element is not in its source location).
    525 static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) {
    526  SimdConstant::I8x16 masked;
    527  MaskLanes(masked, control->asInt8x16());
    528  if (!IsIdentity(masked)) {
    529    return Nothing();
    530  }
    531  SimdConstant::I8x16 mapped;
    532  MapLanes(mapped, control->asInt8x16(),
    533           [](int x) -> int { return x < 16 ? 0 : -1; });
    534  *control = SimdConstant::CreateX16(mapped);
    535  return Some(SimdShuffleOp::BLEND_8x16);
    536 }
    537 
    538 template <typename T>
    539 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
    540  for (int i = 0; i < len; i++) {
    541    if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
    542      return false;
    543    }
    544  }
    545  return true;
    546 }
    547 
    548 // Unpack/interleave:
    549 //  - if we interleave the low (bytes/words/doublewords) of the inputs into
    550 //    the output then this is UNPCKL*W (possibly with a swap of operands).
    551 //  - if we interleave the high ditto then it is UNPCKH*W (ditto)
    552 template <typename T>
    553 static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs,
    554                                          bool* swapOperands,
    555                                          SimdShuffleOp lowOp,
    556                                          SimdShuffleOp highOp) {
    557  int len = int(32 / (sizeof(T) * 4));
    558  if (MatchInterleave(lanes, lhs, rhs, len)) {
    559    return Some(lowOp);
    560  }
    561  if (MatchInterleave(lanes, rhs, lhs, len)) {
    562    *swapOperands = !*swapOperands;
    563    return Some(lowOp);
    564  }
    565  if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
    566    return Some(highOp);
    567  }
    568  if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
    569    *swapOperands = !*swapOperands;
    570    return Some(highOp);
    571  }
    572  return Nothing();
    573 }
    574 
    575 static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control,
    576                                              bool* swapOperands) {
    577  SimdConstant tmp = *control;
    578  if (!ByteMaskToQWordMask(&tmp)) {
    579    return Nothing();
    580  }
    581  const SimdConstant::I64x2& lanes = tmp.asInt64x2();
    582  return TryInterleave(lanes, 0, 2, swapOperands,
    583                       SimdShuffleOp::INTERLEAVE_LOW_64x2,
    584                       SimdShuffleOp::INTERLEAVE_HIGH_64x2);
    585 }
    586 
    587 static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control,
    588                                              bool* swapOperands) {
    589  SimdConstant tmp = *control;
    590  if (!ByteMaskToDWordMask(&tmp)) {
    591    return Nothing();
    592  }
    593  const SimdConstant::I32x4& lanes = tmp.asInt32x4();
    594  return TryInterleave(lanes, 0, 4, swapOperands,
    595                       SimdShuffleOp::INTERLEAVE_LOW_32x4,
    596                       SimdShuffleOp::INTERLEAVE_HIGH_32x4);
    597 }
    598 
    599 static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control,
    600                                              bool* swapOperands) {
    601  SimdConstant tmp = *control;
    602  if (!ByteMaskToWordMask(&tmp)) {
    603    return Nothing();
    604  }
    605  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
    606  return TryInterleave(lanes, 0, 8, swapOperands,
    607                       SimdShuffleOp::INTERLEAVE_LOW_16x8,
    608                       SimdShuffleOp::INTERLEAVE_HIGH_16x8);
    609 }
    610 
    611 static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control,
    612                                              bool* swapOperands) {
    613  const SimdConstant::I8x16& lanes = control->asInt8x16();
    614  return TryInterleave(lanes, 0, 16, swapOperands,
    615                       SimdShuffleOp::INTERLEAVE_LOW_8x16,
    616                       SimdShuffleOp::INTERLEAVE_HIGH_8x16);
    617 }
    618 
    619 static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control,
    620                                          bool* swapOperands) {
    621  Maybe<SimdShuffleOp> op;
    622  op = TryConcatRightShift8x16(control, swapOperands);
    623  if (!op) {
    624    op = TryBlendInt16x8(control);
    625  }
    626  if (!op) {
    627    op = TryBlendInt8x16(control);
    628  }
    629  if (!op) {
    630    op = TryInterleave64x2(control, swapOperands);
    631  }
    632  if (!op) {
    633    op = TryInterleave32x4(control, swapOperands);
    634  }
    635  if (!op) {
    636    op = TryInterleave16x8(control, swapOperands);
    637  }
    638  if (!op) {
    639    op = TryInterleave8x16(control, swapOperands);
    640  }
    641  if (!op) {
    642    op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16);
    643  }
    644  return *op;
    645 }
    646 
    647 // Reorder the operands if that seems useful, notably, move a constant to the
    648 // right hand side.  Rewrites the control to account for any move.
    649 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
    650                                        SimdConstant* control) {
    651  if ((*lhs)->isWasmFloatConstant()) {
    652    MDefinition* tmp = *lhs;
    653    *lhs = *rhs;
    654    *rhs = tmp;
    655 
    656    int8_t controlBytes[16];
    657    const SimdConstant::I8x16& lanes = control->asInt8x16();
    658    for (unsigned i = 0; i < 16; i++) {
    659      controlBytes[i] = int8_t(lanes[i] ^ 16);
    660    }
    661    *control = SimdConstant::CreateX16(controlBytes);
    662 
    663    return true;
    664  }
    665  return false;
    666 }
    667 
    668 #  ifdef DEBUG
    669 static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) {
    670  switch (s.opd) {
    671    case SimdShuffle::Operand::BOTH:
    672    case SimdShuffle::Operand::BOTH_SWAPPED:
    673      switch (*s.shuffleOp) {
    674        case SimdShuffleOp::SHUFFLE_BLEND_8x16:
    675          js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
    676          break;
    677        case SimdShuffleOp::BLEND_8x16:
    678          js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
    679          break;
    680        case SimdShuffleOp::BLEND_16x8:
    681          js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
    682          break;
    683        case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16:
    684          js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
    685          break;
    686        case SimdShuffleOp::INTERLEAVE_HIGH_8x16:
    687          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
    688          break;
    689        case SimdShuffleOp::INTERLEAVE_HIGH_16x8:
    690          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
    691          break;
    692        case SimdShuffleOp::INTERLEAVE_HIGH_32x4:
    693          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
    694          break;
    695        case SimdShuffleOp::INTERLEAVE_HIGH_64x2:
    696          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
    697          break;
    698        case SimdShuffleOp::INTERLEAVE_LOW_8x16:
    699          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
    700          break;
    701        case SimdShuffleOp::INTERLEAVE_LOW_16x8:
    702          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
    703          break;
    704        case SimdShuffleOp::INTERLEAVE_LOW_32x4:
    705          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
    706          break;
    707        case SimdShuffleOp::INTERLEAVE_LOW_64x2:
    708          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
    709          break;
    710        default:
    711          MOZ_CRASH("Unexpected shuffle op");
    712      }
    713      break;
    714    case SimdShuffle::Operand::LEFT:
    715    case SimdShuffle::Operand::RIGHT:
    716      switch (*s.permuteOp) {
    717        case SimdPermuteOp::BROADCAST_8x16:
    718          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
    719          break;
    720        case SimdPermuteOp::BROADCAST_16x8:
    721          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
    722          break;
    723        case SimdPermuteOp::MOVE:
    724          js::wasm::ReportSimdAnalysis("shuffle -> move");
    725          break;
    726        case SimdPermuteOp::REVERSE_16x8:
    727          js::wasm::ReportSimdAnalysis(
    728              "shuffle -> reverse bytes in 16-bit lanes");
    729          break;
    730        case SimdPermuteOp::REVERSE_32x4:
    731          js::wasm::ReportSimdAnalysis(
    732              "shuffle -> reverse bytes in 32-bit lanes");
    733          break;
    734        case SimdPermuteOp::REVERSE_64x2:
    735          js::wasm::ReportSimdAnalysis(
    736              "shuffle -> reverse bytes in 64-bit lanes");
    737          break;
    738        case SimdPermuteOp::PERMUTE_8x16:
    739          js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
    740          break;
    741        case SimdPermuteOp::PERMUTE_16x8:
    742          js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
    743          break;
    744        case SimdPermuteOp::PERMUTE_32x4:
    745          js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
    746          break;
    747        case SimdPermuteOp::ROTATE_RIGHT_8x16:
    748          js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
    749          break;
    750        case SimdPermuteOp::SHIFT_LEFT_8x16:
    751          js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
    752          break;
    753        case SimdPermuteOp::SHIFT_RIGHT_8x16:
    754          js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
    755          break;
    756        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
    757          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 16x8");
    758          break;
    759        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
    760          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 32x4");
    761          break;
    762        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
    763          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 64x2");
    764          break;
    765        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
    766          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 32x4");
    767          break;
    768        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
    769          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 64x2");
    770          break;
    771        case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
    772          js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 32x4 to 64x2");
    773          break;
    774        default:
    775          MOZ_CRASH("Unexpected permute op");
    776      }
    777      break;
    778  }
    779  return s;
    780 }
    781 #  endif  // DEBUG
    782 
    783 SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
    784                                    MDefinition* rhs) {
    785 #  ifdef DEBUG
    786 #    define R(s) ReportShuffleSpecialization(s)
    787 #  else
    788 #    define R(s) (s)
    789 #  endif
    790 
    791  // If only one of the inputs is used, determine which.
    792  bool useLeft = true;
    793  bool useRight = true;
    794  if (lhs == rhs) {
    795    useRight = false;
    796  } else {
    797    bool allAbove = true;
    798    bool allBelow = true;
    799    const SimdConstant::I8x16& lanes = control.asInt8x16();
    800    for (int8_t i : lanes) {
    801      allAbove = allAbove && i >= 16;
    802      allBelow = allBelow && i < 16;
    803    }
    804    if (allAbove) {
    805      useLeft = false;
    806    } else if (allBelow) {
    807      useRight = false;
    808    }
    809  }
    810 
    811  // Deal with one-ignored-input.
    812  if (!(useLeft && useRight)) {
    813    SimdPermuteOp op = AnalyzePermute(&control);
    814    return R(SimdShuffle::permute(
    815        useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT,
    816        control, op));
    817  }
    818 
    819  // Move constants to rhs.
    820  bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
    821 
    822  // Deal with constant rhs.
    823  if (rhs->isWasmFloatConstant()) {
    824    SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
    825    if (rhsConstant.isZeroBits()) {
    826      Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control);
    827      if (op) {
    828        return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT
    829                                                   : SimdShuffle::Operand::LEFT,
    830                                      control, *op));
    831      }
    832    }
    833  }
    834 
    835  // Two operands both of which are used.  If there's one constant operand it is
    836  // now on the rhs.
    837  SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands);
    838  return R(SimdShuffle::shuffle(swapOperands
    839                                    ? SimdShuffle::Operand::BOTH_SWAPPED
    840                                    : SimdShuffle::Operand::BOTH,
    841                                control, op));
    842 #  undef R
    843 }
    844 
    845 #endif  // ENABLE_WASM_SIMD