tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

avx2-x64-ion-codegen.js (24223B)


      1 // |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration("x64") || getBuildConfiguration("simulator") || !isAvxPresent(); include:codegen-x64-test.js
      2 
      3 // Test that there are no extraneous moves for various SIMD conversion
      4 // operations. See README-codegen.md for general information about this type of
      5 // test case.
      6 
      7 // Note, these tests test the beginning of the output but not the end.
      8 
      9 // Currently AVX2 exhibits a defect when function uses its first v128 arg and
     10 // returns v128: the register allocator adds unneeded extra moves from xmm0,
     11 // then into different temporary, and then the latter temporary is used as arg.
     12 // In the tests below, to simplify things, don't use/ignore the first arg.
     13 // v128 OP v128 -> v128
     14 // inputs: [[complete-opname, expected-pattern], ...]
     15 function codegenTestX64_v128xv128_v128_avxhack(inputs, options = {}) {
     16     for ( let [op, expected] of inputs ) {
     17         codegenTestX64_adhoc(wrap(options, `
     18         (func (export "f") (param v128 v128 v128) (result v128)
     19           (${op} (local.get 1) (local.get 2)))`),
     20                              'f',
     21                              expected,
     22                              options);
     23     }
     24 }
     25 // (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
     26 // v128 OP const -> v128
     27 // inputs: [[complete-opname, const, expected-pattern], ...]
     28 function codegenTestX64_v128xLITERAL_v128_avxhack(inputs, options = {}) {
     29     for ( let [op, const_, expected] of inputs ) {
     30         codegenTestX64_adhoc(wrap(options, `
     31         (func (export "f") (param v128 v128) (result v128)
     32           (${op} (local.get 1) ${const_}))`),
     33                              'f',
     34                              expected,
     35                              options);
     36     }
     37 }
     38 // (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
     39 // const OP v128 -> v128
     40 // inputs: [[complete-opname, const, expected-pattern], ...]
     41 function codegenTestX64_LITERALxv128_v128_avxhack(inputs, options = {}) {
     42     for ( let [op, const_, expected] of inputs ) {
     43         codegenTestX64_adhoc(wrap(options, `
     44         (func (export "f") (param v128 v128) (result v128)
     45           (${op} ${const_} (local.get 1)))`),
     46                              'f',
     47                              expected,
     48                              options);
     49     }
     50 }
     51 
     52 // Utility function to test SIMD operations encoding, where the input argument
     53 // has the specified type (T).
     54 // inputs: [[type, complete-opname, expected-pattern], ...]
     55 function codegenTestX64_T_v128_avxhack(inputs, options = {}) {
     56     for ( let [ty, op, expected] of inputs ) {
     57         codegenTestX64_adhoc(wrap(options, `
     58         (func (export "f") (param ${ty}) (result v128)
     59           (${op} (local.get 0)))`),
     60                              'f',
     61                              expected,
     62                              options);
     63     }
     64 }
     65 
     66 // Machers for any 64- and 32-bit registers.
     67 var GPR_I64 = "%r\\w+";
     68 var GPR_I32 = "%(?:e\\w+|r\\d+d)";
     69 
     70 // Simple binary ops: e.g. add, sub, mul
     71 codegenTestX64_v128xv128_v128_avxhack(
     72     [['i8x16.avgr_u',    `vpavgb %xmm2, %xmm1, %xmm0`],
     73      ['i16x8.avgr_u',    `vpavgw %xmm2, %xmm1, %xmm0`],
     74      ['i8x16.add',       `vpaddb %xmm2, %xmm1, %xmm0`],
     75      ['i8x16.add_sat_s', `vpaddsb %xmm2, %xmm1, %xmm0`],
     76      ['i8x16.add_sat_u', `vpaddusb %xmm2, %xmm1, %xmm0`],
     77      ['i8x16.sub',       `vpsubb %xmm2, %xmm1, %xmm0`],
     78      ['i8x16.sub_sat_s', `vpsubsb %xmm2, %xmm1, %xmm0`],
     79      ['i8x16.sub_sat_u', `vpsubusb %xmm2, %xmm1, %xmm0`],
     80      ['i16x8.mul',       `vpmullw %xmm2, %xmm1, %xmm0`],
     81      ['i16x8.min_s',     `vpminsw %xmm2, %xmm1, %xmm0`],
     82      ['i16x8.min_u',     `vpminuw %xmm2, %xmm1, %xmm0`],
     83      ['i16x8.max_s',     `vpmaxsw %xmm2, %xmm1, %xmm0`],
     84      ['i16x8.max_u',     `vpmaxuw %xmm2, %xmm1, %xmm0`],
     85      ['i32x4.add',       `vpaddd %xmm2, %xmm1, %xmm0`],
     86      ['i32x4.sub',       `vpsubd %xmm2, %xmm1, %xmm0`],
     87      ['i32x4.mul',       `vpmulld %xmm2, %xmm1, %xmm0`],
     88      ['i32x4.min_s',     `vpminsd %xmm2, %xmm1, %xmm0`],
     89      ['i32x4.min_u',     `vpminud %xmm2, %xmm1, %xmm0`],
     90      ['i32x4.max_s',     `vpmaxsd %xmm2, %xmm1, %xmm0`],
     91      ['i32x4.max_u',     `vpmaxud %xmm2, %xmm1, %xmm0`],
     92      ['i64x2.add',       `vpaddq %xmm2, %xmm1, %xmm0`],
     93      ['i64x2.sub',       `vpsubq %xmm2, %xmm1, %xmm0`],
     94      ['i64x2.mul', `
     95 vpsrlq \\$0x20, %xmm1, %xmm3
     96 pmuludq %xmm2, %xmm3
     97 vpsrlq \\$0x20, %xmm2, %xmm15
     98 pmuludq %xmm1, %xmm15
     99 paddq %xmm3, %xmm15
    100 psllq \\$0x20, %xmm15
    101 vpmuludq %xmm2, %xmm1, %xmm0
    102 paddq %xmm15, %xmm0`],
    103      ['f32x4.add',            `vaddps %xmm2, %xmm1, %xmm0`],
    104      ['f32x4.sub',            `vsubps %xmm2, %xmm1, %xmm0`],
    105      ['f32x4.mul',            `vmulps %xmm2, %xmm1, %xmm0`],
    106      ['f32x4.div',            `vdivps %xmm2, %xmm1, %xmm0`],
    107      ['f64x2.add',            `vaddpd %xmm2, %xmm1, %xmm0`],
    108      ['f64x2.sub',            `vsubpd %xmm2, %xmm1, %xmm0`],
    109      ['f64x2.mul',            `vmulpd %xmm2, %xmm1, %xmm0`],
    110      ['f64x2.div',            `vdivpd %xmm2, %xmm1, %xmm0`],
    111      ['i8x16.narrow_i16x8_s', `vpacksswb %xmm2, %xmm1, %xmm0`],
    112      ['i8x16.narrow_i16x8_u', `vpackuswb %xmm2, %xmm1, %xmm0`],
    113      ['i16x8.narrow_i32x4_s', `vpackssdw %xmm2, %xmm1, %xmm0`],
    114      ['i16x8.narrow_i32x4_u', `vpackusdw %xmm2, %xmm1, %xmm0`],
    115      ['i32x4.dot_i16x8_s',    `vpmaddwd %xmm2, %xmm1, %xmm0`]]);
    116 
    117 // Simple comparison ops
    118 codegenTestX64_v128xv128_v128_avxhack(
    119     [['i8x16.eq', `vpcmpeqb %xmm2, %xmm1, %xmm0`],
    120      ['i8x16.ne', `
    121 vpcmpeqb %xmm2, %xmm1, %xmm0
    122 pcmpeqw %xmm15, %xmm15
    123 pxor %xmm15, %xmm0`],
    124      ['i8x16.lt_s', `vpcmpgtb %xmm1, %xmm2, %xmm0`],
    125      ['i8x16.gt_u', `
    126 vpmaxub %xmm2, %xmm1, %xmm0
    127 pcmpeqb %xmm2, %xmm0
    128 pcmpeqw %xmm15, %xmm15
    129 pxor %xmm15, %xmm0`],
    130      ['i16x8.eq', `vpcmpeqw %xmm2, %xmm1, %xmm0`],
    131      ['i16x8.ne', `
    132 vpcmpeqw %xmm2, %xmm1, %xmm0
    133 pcmpeqw %xmm15, %xmm15
    134 pxor %xmm15, %xmm0`],
    135      ['i16x8.le_s', `
    136 vpcmpgtw %xmm2, %xmm1, %xmm0
    137 pcmpeqw %xmm15, %xmm15
    138 pxor %xmm15, %xmm0`],
    139      ['i16x8.ge_u', `
    140 vpminuw %xmm2, %xmm1, %xmm0
    141 pcmpeqw %xmm2, %xmm0`],
    142      ['i32x4.eq', `vpcmpeqd %xmm2, %xmm1, %xmm0`],
    143      ['i32x4.ne', `
    144 vpcmpeqd %xmm2, %xmm1, %xmm0
    145 pcmpeqw %xmm15, %xmm15
    146 pxor %xmm15, %xmm0`],
    147      ['i32x4.lt_s', `vpcmpgtd %xmm1, %xmm2, %xmm0`],
    148      ['i32x4.gt_u', `
    149 vpmaxud %xmm2, %xmm1, %xmm0
    150 pcmpeqd %xmm2, %xmm0
    151 pcmpeqw %xmm15, %xmm15
    152 pxor %xmm15, %xmm0`],
    153      ['i64x2.eq', `vpcmpeqq %xmm2, %xmm1, %xmm0`],
    154      ['i64x2.ne', `
    155 vpcmpeqq %xmm2, %xmm1, %xmm0
    156 pcmpeqw %xmm15, %xmm15
    157 pxor %xmm15, %xmm0`],
    158      ['i64x2.lt_s', `vpcmpgtq %xmm1, %xmm2, %xmm0`],
    159      ['i64x2.ge_s', `
    160 vpcmpgtq %xmm1, %xmm2, %xmm0
    161 pcmpeqw %xmm15, %xmm15
    162 pxor %xmm15, %xmm0`],
    163      ['f32x4.eq', `vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`],
    164      ['f32x4.lt', `vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`],
    165      ['f32x4.ge', `vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`],
    166      ['f64x2.eq', `vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`],
    167      ['f64x2.lt', `vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`],
    168      ['f64x2.ge', `vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`],
    169      ['f32x4.pmin', `vminps %xmm1, %xmm2, %xmm0`],
    170      ['f32x4.pmax', `vmaxps %xmm1, %xmm2, %xmm0`],
    171      ['f64x2.pmin', `vminpd %xmm1, %xmm2, %xmm0`],
    172      ['f64x2.pmax', `vmaxpd %xmm1, %xmm2, %xmm0`],
    173      ['i8x16.swizzle', `
    174 vpaddusbx ${RIPR}, %xmm2, %xmm15
    175 vpshufb %xmm15, %xmm1, %xmm0`],
    176      ['i16x8.extmul_high_i8x16_s', `
    177 palignr \\$0x08, %xmm2, %xmm15
    178 vpmovsxbw %xmm15, %xmm15
    179 palignr \\$0x08, %xmm1, %xmm0
    180 vpmovsxbw %xmm0, %xmm0
    181 pmullw %xmm15, %xmm0`],
    182      ['i32x4.extmul_low_i16x8_u', `
    183 vpmulhuw %xmm2, %xmm1, %xmm15
    184 vpmullw %xmm2, %xmm1, %xmm0
    185 punpcklwd %xmm15, %xmm0`],
    186      ['i64x2.extmul_low_i32x4_s', `
    187 vpshufd \\$0x10, %xmm1, %xmm15
    188 vpshufd \\$0x10, %xmm2, %xmm0
    189 pmuldq %xmm15, %xmm0`],
    190      ['i16x8.q15mulr_sat_s', `
    191 vpmulhrsw %xmm2, %xmm1, %xmm0
    192 vpcmpeqwx ${RIPR}, %xmm0, %xmm15
    193 pxor %xmm15, %xmm0`],
    194 ]);
    195 
    196 // Bitwise binary ops
    197 codegenTestX64_v128xv128_v128_avxhack(
    198     [['v128.and', `vpand %xmm2, %xmm1, %xmm0`],
    199      ['v128.andnot', `vpandn %xmm1, %xmm2, %xmm0`],
    200      ['v128.or', `vpor %xmm2, %xmm1, %xmm0`],
    201      ['v128.xor', `vpxor %xmm2, %xmm1, %xmm0`]]);
    202 
    203 
    204 // Replace lane ops.
    205 codegenTestX64_adhoc(`(module
    206     (func (export "f") (param v128 v128 i32) (result v128)
    207          (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', `
    208 vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`);
    209 codegenTestX64_adhoc(`(module
    210     (func (export "f") (param v128 v128 i32) (result v128)
    211          (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', `
    212 vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`);
    213 codegenTestX64_adhoc(`(module
    214     (func (export "f") (param v128 v128 i32) (result v128)
    215          (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', `
    216 vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`);
    217 codegenTestX64_adhoc(`(module
    218     (func (export "f") (param v128 v128 i64) (result v128)
    219          (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', `
    220 vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`);
    221 
    222 
    223 if (isAvxPresent(2)) {
    224     codegenTestX64_T_v128_avxhack(
    225          [['i32', 'i8x16.splat', `
    226 vmovd ${GPR_I32}, %xmm0
    227 vpbroadcastb %xmm0, %xmm0`],
    228           ['i32', 'i16x8.splat', `
    229 vmovd ${GPR_I32}, %xmm0
    230 vpbroadcastw %xmm0, %xmm0`],
    231           ['i32', 'i32x4.splat', `
    232 vmovd ${GPR_I32}, %xmm0
    233 vpbroadcastd %xmm0, %xmm0`],
    234           ['i64', 'i64x2.splat', `
    235 vmovq ${GPR_I64}, %xmm0
    236 vpbroadcastq %xmm0, %xmm0`],
    237           ['f32', 'f32x4.splat', `vbroadcastss %xmm0, %xmm0`]], {log:true});
    238 
    239     codegenTestX64_T_v128_avxhack(
    240          [['i32', 'v128.load8_splat',
    241            'vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'],
    242           ['i32', 'v128.load16_splat',
    243            'vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'],
    244           ['i32', 'v128.load32_splat',
    245            'vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1});
    246 }
    247 
    248 // Using VEX during shuffle ops
    249 codegenTestX64_v128xv128_v128_avxhack([
    250     // Identity op on second argument should generate a move
    251    ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15',
    252     'vmovdqa %xmm1, %xmm0'],
    253 
    254     // Broadcast a byte from first argument
    255    ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5',
    256     `
    257 vpunpcklbw %xmm1, %xmm1, %xmm0
    258 vpshufhw \\$0x55, %xmm0, %xmm0
    259 vpshufd \\$0xAA, %xmm0, %xmm0`],
    260 
    261     // Broadcast a word from first argument
    262    ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5',
    263     `
    264 vpshuflw \\$0xAA, %xmm1, %xmm0
    265 vpshufd \\$0x00, %xmm0, %xmm0`],
    266 
    267     // Permute words
    268     ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13',
    269 `
    270 vpshuflw \\$0xB1, %xmm1, %xmm0
    271 vpshufhw \\$0xB1, %xmm0, %xmm0`],
    272 
    273     // Permute doublewords
    274     ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11',
    275      'vpshufd \\$0xB1, %xmm1, %xmm0'],
    276 
    277     // Interleave doublewords
    278     ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23',
    279      'vpunpckldq %xmm2, %xmm1, %xmm0'],
    280 
    281     // Interleave quadwords
    282     ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15',
    283      'vpunpckhqdq %xmm1, %xmm2, %xmm0'],
    284 
    285     // Rotate right
    286    ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12',
    287     `vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`],
    288    ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11',
    289     `vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]);
    290 
    291 if (isAvxPresent(2)) {
    292     codegenTestX64_v128xv128_v128_avxhack([
    293          // Broadcast low byte from second argument
    294          ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
    295           'vpbroadcastb %xmm1, %xmm0'],
    296 
    297          // Broadcast low word from third argument
    298          ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17',
    299          'vpbroadcastw %xmm2, %xmm0'],
    300 
    301          // Broadcast low doubleword from second argument
    302          ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3',
    303           'vpbroadcastd %xmm1, %xmm0']]);
    304 }
    305 
    306 // Testing AVX optimization where VPBLENDVB accepts four XMM registers as args.
    307 codegenTestX64_adhoc(
    308     `(func (export "f") (param v128 v128 v128 v128) (result v128)
    309        (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15
    310          (local.get 2)(local.get 3)))`,
    311     'f',
    312 `
    313 movdqax ${RIPR}, %xmm1
    314 vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`);
    315 
    316 // Constant arguments that are folded into the instruction
    317 codegenTestX64_v128xLITERAL_v128_avxhack(
    318     [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    319       `vpaddbx ${RIPR}, %xmm1, %xmm0`],
    320      ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    321       `vpsubbx ${RIPR}, %xmm1, %xmm0`],
    322      ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    323       `vpaddsbx ${RIPR}, %xmm1, %xmm0`],
    324      ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    325       `vpaddusbx ${RIPR}, %xmm1, %xmm0`],
    326      ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    327       `vpsubsbx ${RIPR}, %xmm1, %xmm0`],
    328      ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    329       `vpsubusbx ${RIPR}, %xmm1, %xmm0`],
    330      ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    331       `vpminsbx ${RIPR}, %xmm1, %xmm0`],
    332      ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    333       `vpminubx ${RIPR}, %xmm1, %xmm0`],
    334      ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    335       `vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
    336      ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    337       `vpmaxubx ${RIPR}, %xmm1, %xmm0`],
    338      ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    339       `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
    340      ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
    341 vpcmpeqbx ${RIPR}, %xmm1, %xmm0
    342 pcmpeqw %xmm15, %xmm15
    343 pxor %xmm15, %xmm0`],
    344      ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    345       `vpcmpgtbx ${RIPR}, %xmm1, %xmm0`],
    346      ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
    347 vpcmpgtbx ${RIPR}, %xmm1, %xmm0
    348 pcmpeqw %xmm15, %xmm15
    349 pxor %xmm15, %xmm0`],
    350      ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    351       `vpacksswbx ${RIPR}, %xmm1, %xmm0`],
    352      ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    353       `vpackuswbx ${RIPR}, %xmm1, %xmm0`],
    354 
    355      ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    356       `vpaddwx ${RIPR}, %xmm1, %xmm0`],
    357      ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    358       `vpsubwx ${RIPR}, %xmm1, %xmm0`],
    359      ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    360       `vpmullwx ${RIPR}, %xmm1, %xmm0`],
    361      ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    362       `vpaddswx ${RIPR}, %xmm1, %xmm0`],
    363      ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    364       `vpadduswx ${RIPR}, %xmm1, %xmm0`],
    365      ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    366       `vpsubswx ${RIPR}, %xmm1, %xmm0`],
    367      ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    368       `vpsubuswx ${RIPR}, %xmm1, %xmm0`],
    369      ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    370       `vpminswx ${RIPR}, %xmm1, %xmm0`],
    371      ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    372       `vpminuwx ${RIPR}, %xmm1, %xmm0`],
    373      ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    374       `vpmaxswx ${RIPR}, %xmm1, %xmm0`],
    375      ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    376       `vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
    377      ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    378       `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
    379      ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
    380 vpcmpeqwx ${RIPR}, %xmm1, %xmm0
    381 pcmpeqw %xmm15, %xmm15
    382 pxor %xmm15, %xmm0`],
    383      ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    384       `vpcmpgtwx ${RIPR}, %xmm1, %xmm0`],
    385      ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
    386 vpcmpgtwx ${RIPR}, %xmm1, %xmm0
    387 pcmpeqw %xmm15, %xmm15
    388 pxor %xmm15, %xmm0`],
    389      ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    390       `vpackssdwx ${RIPR}, %xmm1, %xmm0`],
    391      ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    392       `vpackusdwx ${RIPR}, %xmm1, %xmm0`],
    393 
    394      ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
    395       `vpadddx ${RIPR}, %xmm1, %xmm0`],
    396      ['i32x4.sub', '(v128.const i32x4 1 2 1 2)',
    397       `vpsubdx ${RIPR}, %xmm1, %xmm0`],
    398      ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
    399       `vpmulldx ${RIPR}, %xmm1, %xmm0`],
    400      ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
    401       `vpminsdx ${RIPR}, %xmm1, %xmm0`],
    402      ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
    403       `vpminudx ${RIPR}, %xmm1, %xmm0`],
    404      ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
    405       `vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
    406      ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
    407       `vpmaxudx ${RIPR}, %xmm1, %xmm0`],
    408      ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
    409       `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
    410      ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
    411 vpcmpeqdx ${RIPR}, %xmm1, %xmm0
    412 pcmpeqw %xmm15, %xmm15
    413 pxor %xmm15, %xmm0`],
    414      ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)',
    415       `vpcmpgtdx ${RIPR}, %xmm1, %xmm0`],
    416      ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', `
    417 vpcmpgtdx ${RIPR}, %xmm1, %xmm0
    418 pcmpeqw %xmm15, %xmm15
    419 pxor %xmm15, %xmm0`],
    420      ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
    421       `vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
    422 
    423      ['i64x2.add', '(v128.const i64x2 1 2)',
    424       `vpaddqx ${RIPR}, %xmm1, %xmm0`],
    425      ['i64x2.sub', '(v128.const i64x2 1 2)',
    426       `vpsubqx ${RIPR}, %xmm1, %xmm0`],
    427 
    428      ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    429       `vpandx ${RIPR}, %xmm1, %xmm0`],
    430      ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    431       `vporx ${RIPR}, %xmm1, %xmm0`],
    432      ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    433       `vpxorx ${RIPR}, %xmm1, %xmm0`],
    434 
    435      ['f32x4.add', '(v128.const f32x4 1 2 3 4)',
    436       `vaddpsx ${RIPR}, %xmm1, %xmm0`],
    437      ['f32x4.sub', '(v128.const f32x4 1 2 3 4)',
    438       `vsubpsx ${RIPR}, %xmm1, %xmm0`],
    439      ['f32x4.mul', '(v128.const f32x4 1 2 3 4)',
    440       `vmulpsx ${RIPR}, %xmm1, %xmm0`],
    441      ['f32x4.div', '(v128.const f32x4 1 2 3 4)',
    442       `vdivpsx ${RIPR}, %xmm1, %xmm0`],
    443 
    444      ['f64x2.add', '(v128.const f64x2 1 2)',
    445       `vaddpdx ${RIPR}, %xmm1, %xmm0`],
    446      ['f64x2.sub', '(v128.const f64x2 1 2)',
    447       `vsubpdx ${RIPR}, %xmm1, %xmm0`],
    448      ['f64x2.mul', '(v128.const f64x2 1 2)',
    449       `vmulpdx ${RIPR}, %xmm1, %xmm0`],
    450      ['f64x2.div', '(v128.const f64x2 1 2)',
    451       `vdivpdx ${RIPR}, %xmm1, %xmm0`],
    452 
    453      ['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
    454       `vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
    455      ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
    456       `vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
    457      ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
    458       `vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
    459      ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
    460       `vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`],
    461 
    462      ['f64x2.eq', '(v128.const f64x2 1 2)',
    463       `vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
    464      ['f64x2.ne', '(v128.const f64x2 1 2)',
    465       `vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
    466      ['f64x2.lt', '(v128.const f64x2 1 2)',
    467       `vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
    468      ['f64x2.le', '(v128.const f64x2 1 2)',
    469       `vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]);
    470 
    471 // Commutative operations with constants on the lhs should generate the same
    472 // code as with the constant on the rhs.
    473 codegenTestX64_LITERALxv128_v128_avxhack(
    474     [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    475       `vpaddbx ${RIPR}, %xmm1, %xmm0`],
    476      ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    477       `vpaddsbx ${RIPR}, %xmm1, %xmm0`],
    478      ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    479       `vpaddusbx ${RIPR}, %xmm1, %xmm0`],
    480      ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    481       `vpminsbx ${RIPR}, %xmm1, %xmm0`],
    482      ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    483       `vpminubx ${RIPR}, %xmm1, %xmm0`],
    484      ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    485       `vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
    486      ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    487       `vpmaxubx ${RIPR}, %xmm1, %xmm0`],
    488      ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    489       `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
    490      ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
    491 vpcmpeqbx ${RIPR}, %xmm1, %xmm0
    492 pcmpeqw %xmm15, %xmm15
    493 pxor %xmm15, %xmm0`],
    494 
    495      ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    496       `vpaddwx ${RIPR}, %xmm1, %xmm0`],
    497      ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    498       `vpmullwx ${RIPR}, %xmm1, %xmm0`],
    499      ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    500       `vpaddswx ${RIPR}, %xmm1, %xmm0`],
    501      ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    502       `vpadduswx ${RIPR}, %xmm1, %xmm0`],
    503      ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    504       `vpminswx ${RIPR}, %xmm1, %xmm0`],
    505      ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    506       `vpminuwx ${RIPR}, %xmm1, %xmm0`],
    507      ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    508       `vpmaxswx ${RIPR}, %xmm1, %xmm0`],
    509      ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    510       `vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
    511      ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
    512       `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
    513      ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
    514 vpcmpeqwx ${RIPR}, %xmm1, %xmm0
    515 pcmpeqw %xmm15, %xmm15
    516 pxor %xmm15, %xmm0`],
    517 
    518      ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
    519       `vpadddx ${RIPR}, %xmm1, %xmm0`],
    520      ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
    521       `vpmulldx ${RIPR}, %xmm1, %xmm0`],
    522      ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
    523       `vpminsdx ${RIPR}, %xmm1, %xmm0`],
    524      ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
    525       `vpminudx ${RIPR}, %xmm1, %xmm0`],
    526      ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
    527       `vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
    528      ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
    529       `vpmaxudx ${RIPR}, %xmm1, %xmm0`],
    530      ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
    531       `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
    532      ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
    533 vpcmpeqdx ${RIPR}, %xmm1, %xmm0
    534 pcmpeqw %xmm15, %xmm15
    535 pxor %xmm15, %xmm0`],
    536      ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
    537       `vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
    538 
    539      ['i64x2.add', '(v128.const i64x2 1 2)',
    540       `vpaddqx ${RIPR}, %xmm1, %xmm0`],
    541 
    542      ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    543       `vpandx ${RIPR}, %xmm1, %xmm0`],
    544      ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    545       `vporx ${RIPR}, %xmm1, %xmm0`],
    546      ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
    547       `vpxorx ${RIPR}, %xmm1, %xmm0`]]);
    548 
    549 // Shift by constant encodings
    550 codegenTestX64_v128xLITERAL_v128_avxhack(
    551     [['i8x16.shl', '(i32.const 2)', `
    552 vpaddb %xmm1, %xmm1, %xmm0
    553 paddb %xmm0, %xmm0`],
    554      ['i8x16.shl', '(i32.const 4)', `
    555 vpandx ${RIPR}, %xmm1, %xmm0
    556 psllw \\$0x04, %xmm0`],
    557      ['i16x8.shl', '(i32.const 1)',
    558       'vpsllw \\$0x01, %xmm1, %xmm0'],
    559      ['i16x8.shr_s', '(i32.const 3)',
    560       'vpsraw \\$0x03, %xmm1, %xmm0'],
    561      ['i16x8.shr_u', '(i32.const 2)',
    562       'vpsrlw \\$0x02, %xmm1, %xmm0'],
    563      ['i32x4.shl', '(i32.const 5)',
    564       'vpslld \\$0x05, %xmm1, %xmm0'],
    565      ['i32x4.shr_s', '(i32.const 2)',
    566       'vpsrad \\$0x02, %xmm1, %xmm0'],
    567      ['i32x4.shr_u', '(i32.const 5)',
    568       'vpsrld \\$0x05, %xmm1, %xmm0'],
    569      ['i64x2.shr_s', '(i32.const 7)', `
    570 vpshufd \\$0xF5, %xmm1, %xmm15
    571 psrad \\$0x1F, %xmm15
    572 vpxor %xmm15, %xmm1, %xmm0
    573 psrlq \\$0x07, %xmm0
    574 pxor %xmm15, %xmm0`]]);
    575 
    576 // vpblendvp optimization when bitselect follows comparison.
    577 codegenTestX64_adhoc(
    578     `(module
    579         (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
    580           (v128.bitselect (local.get 2) (local.get 3)
    581              (i32x4.eq (local.get 0) (local.get 1)))))`,
    582         'f', `
    583 pcmpeqd %xmm1, %xmm0
    584 vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`);