avx2-x64-ion-codegen.js (24223B)
1 // |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration("x64") || getBuildConfiguration("simulator") || !isAvxPresent(); include:codegen-x64-test.js 2 3 // Test that there are no extraneous moves for various SIMD conversion 4 // operations. See README-codegen.md for general information about this type of 5 // test case. 6 7 // Note, these tests test the beginning of the output but not the end. 8 9 // Currently AVX2 exhibits a defect when function uses its first v128 arg and 10 // returns v128: the register allocator adds unneeded extra moves from xmm0, 11 // then into different temporary, and then the latter temporary is used as arg. 12 // In the tests below, to simplify things, don't use/ignore the first arg. 13 // v128 OP v128 -> v128 14 // inputs: [[complete-opname, expected-pattern], ...] 15 function codegenTestX64_v128xv128_v128_avxhack(inputs, options = {}) { 16 for ( let [op, expected] of inputs ) { 17 codegenTestX64_adhoc(wrap(options, ` 18 (func (export "f") (param v128 v128 v128) (result v128) 19 (${op} (local.get 1) (local.get 2)))`), 20 'f', 21 expected, 22 options); 23 } 24 } 25 // (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect) 26 // v128 OP const -> v128 27 // inputs: [[complete-opname, const, expected-pattern], ...] 28 function codegenTestX64_v128xLITERAL_v128_avxhack(inputs, options = {}) { 29 for ( let [op, const_, expected] of inputs ) { 30 codegenTestX64_adhoc(wrap(options, ` 31 (func (export "f") (param v128 v128) (result v128) 32 (${op} (local.get 1) ${const_}))`), 33 'f', 34 expected, 35 options); 36 } 37 } 38 // (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect) 39 // const OP v128 -> v128 40 // inputs: [[complete-opname, const, expected-pattern], ...] 41 function codegenTestX64_LITERALxv128_v128_avxhack(inputs, options = {}) { 42 for ( let [op, const_, expected] of inputs ) { 43 codegenTestX64_adhoc(wrap(options, ` 44 (func (export "f") (param v128 v128) (result v128) 45 (${op} ${const_} (local.get 1)))`), 46 'f', 47 expected, 48 options); 49 } 50 } 51 52 // Utility function to test SIMD operations encoding, where the input argument 53 // has the specified type (T). 54 // inputs: [[type, complete-opname, expected-pattern], ...] 55 function codegenTestX64_T_v128_avxhack(inputs, options = {}) { 56 for ( let [ty, op, expected] of inputs ) { 57 codegenTestX64_adhoc(wrap(options, ` 58 (func (export "f") (param ${ty}) (result v128) 59 (${op} (local.get 0)))`), 60 'f', 61 expected, 62 options); 63 } 64 } 65 66 // Machers for any 64- and 32-bit registers. 67 var GPR_I64 = "%r\\w+"; 68 var GPR_I32 = "%(?:e\\w+|r\\d+d)"; 69 70 // Simple binary ops: e.g. add, sub, mul 71 codegenTestX64_v128xv128_v128_avxhack( 72 [['i8x16.avgr_u', `vpavgb %xmm2, %xmm1, %xmm0`], 73 ['i16x8.avgr_u', `vpavgw %xmm2, %xmm1, %xmm0`], 74 ['i8x16.add', `vpaddb %xmm2, %xmm1, %xmm0`], 75 ['i8x16.add_sat_s', `vpaddsb %xmm2, %xmm1, %xmm0`], 76 ['i8x16.add_sat_u', `vpaddusb %xmm2, %xmm1, %xmm0`], 77 ['i8x16.sub', `vpsubb %xmm2, %xmm1, %xmm0`], 78 ['i8x16.sub_sat_s', `vpsubsb %xmm2, %xmm1, %xmm0`], 79 ['i8x16.sub_sat_u', `vpsubusb %xmm2, %xmm1, %xmm0`], 80 ['i16x8.mul', `vpmullw %xmm2, %xmm1, %xmm0`], 81 ['i16x8.min_s', `vpminsw %xmm2, %xmm1, %xmm0`], 82 ['i16x8.min_u', `vpminuw %xmm2, %xmm1, %xmm0`], 83 ['i16x8.max_s', `vpmaxsw %xmm2, %xmm1, %xmm0`], 84 ['i16x8.max_u', `vpmaxuw %xmm2, %xmm1, %xmm0`], 85 ['i32x4.add', `vpaddd %xmm2, %xmm1, %xmm0`], 86 ['i32x4.sub', `vpsubd %xmm2, %xmm1, %xmm0`], 87 ['i32x4.mul', `vpmulld %xmm2, %xmm1, %xmm0`], 88 ['i32x4.min_s', `vpminsd %xmm2, %xmm1, %xmm0`], 89 ['i32x4.min_u', `vpminud %xmm2, %xmm1, %xmm0`], 90 ['i32x4.max_s', `vpmaxsd %xmm2, %xmm1, %xmm0`], 91 ['i32x4.max_u', `vpmaxud %xmm2, %xmm1, %xmm0`], 92 ['i64x2.add', `vpaddq %xmm2, %xmm1, %xmm0`], 93 ['i64x2.sub', `vpsubq %xmm2, %xmm1, %xmm0`], 94 ['i64x2.mul', ` 95 vpsrlq \\$0x20, %xmm1, %xmm3 96 pmuludq %xmm2, %xmm3 97 vpsrlq \\$0x20, %xmm2, %xmm15 98 pmuludq %xmm1, %xmm15 99 paddq %xmm3, %xmm15 100 psllq \\$0x20, %xmm15 101 vpmuludq %xmm2, %xmm1, %xmm0 102 paddq %xmm15, %xmm0`], 103 ['f32x4.add', `vaddps %xmm2, %xmm1, %xmm0`], 104 ['f32x4.sub', `vsubps %xmm2, %xmm1, %xmm0`], 105 ['f32x4.mul', `vmulps %xmm2, %xmm1, %xmm0`], 106 ['f32x4.div', `vdivps %xmm2, %xmm1, %xmm0`], 107 ['f64x2.add', `vaddpd %xmm2, %xmm1, %xmm0`], 108 ['f64x2.sub', `vsubpd %xmm2, %xmm1, %xmm0`], 109 ['f64x2.mul', `vmulpd %xmm2, %xmm1, %xmm0`], 110 ['f64x2.div', `vdivpd %xmm2, %xmm1, %xmm0`], 111 ['i8x16.narrow_i16x8_s', `vpacksswb %xmm2, %xmm1, %xmm0`], 112 ['i8x16.narrow_i16x8_u', `vpackuswb %xmm2, %xmm1, %xmm0`], 113 ['i16x8.narrow_i32x4_s', `vpackssdw %xmm2, %xmm1, %xmm0`], 114 ['i16x8.narrow_i32x4_u', `vpackusdw %xmm2, %xmm1, %xmm0`], 115 ['i32x4.dot_i16x8_s', `vpmaddwd %xmm2, %xmm1, %xmm0`]]); 116 117 // Simple comparison ops 118 codegenTestX64_v128xv128_v128_avxhack( 119 [['i8x16.eq', `vpcmpeqb %xmm2, %xmm1, %xmm0`], 120 ['i8x16.ne', ` 121 vpcmpeqb %xmm2, %xmm1, %xmm0 122 pcmpeqw %xmm15, %xmm15 123 pxor %xmm15, %xmm0`], 124 ['i8x16.lt_s', `vpcmpgtb %xmm1, %xmm2, %xmm0`], 125 ['i8x16.gt_u', ` 126 vpmaxub %xmm2, %xmm1, %xmm0 127 pcmpeqb %xmm2, %xmm0 128 pcmpeqw %xmm15, %xmm15 129 pxor %xmm15, %xmm0`], 130 ['i16x8.eq', `vpcmpeqw %xmm2, %xmm1, %xmm0`], 131 ['i16x8.ne', ` 132 vpcmpeqw %xmm2, %xmm1, %xmm0 133 pcmpeqw %xmm15, %xmm15 134 pxor %xmm15, %xmm0`], 135 ['i16x8.le_s', ` 136 vpcmpgtw %xmm2, %xmm1, %xmm0 137 pcmpeqw %xmm15, %xmm15 138 pxor %xmm15, %xmm0`], 139 ['i16x8.ge_u', ` 140 vpminuw %xmm2, %xmm1, %xmm0 141 pcmpeqw %xmm2, %xmm0`], 142 ['i32x4.eq', `vpcmpeqd %xmm2, %xmm1, %xmm0`], 143 ['i32x4.ne', ` 144 vpcmpeqd %xmm2, %xmm1, %xmm0 145 pcmpeqw %xmm15, %xmm15 146 pxor %xmm15, %xmm0`], 147 ['i32x4.lt_s', `vpcmpgtd %xmm1, %xmm2, %xmm0`], 148 ['i32x4.gt_u', ` 149 vpmaxud %xmm2, %xmm1, %xmm0 150 pcmpeqd %xmm2, %xmm0 151 pcmpeqw %xmm15, %xmm15 152 pxor %xmm15, %xmm0`], 153 ['i64x2.eq', `vpcmpeqq %xmm2, %xmm1, %xmm0`], 154 ['i64x2.ne', ` 155 vpcmpeqq %xmm2, %xmm1, %xmm0 156 pcmpeqw %xmm15, %xmm15 157 pxor %xmm15, %xmm0`], 158 ['i64x2.lt_s', `vpcmpgtq %xmm1, %xmm2, %xmm0`], 159 ['i64x2.ge_s', ` 160 vpcmpgtq %xmm1, %xmm2, %xmm0 161 pcmpeqw %xmm15, %xmm15 162 pxor %xmm15, %xmm0`], 163 ['f32x4.eq', `vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`], 164 ['f32x4.lt', `vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`], 165 ['f32x4.ge', `vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`], 166 ['f64x2.eq', `vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`], 167 ['f64x2.lt', `vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`], 168 ['f64x2.ge', `vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`], 169 ['f32x4.pmin', `vminps %xmm1, %xmm2, %xmm0`], 170 ['f32x4.pmax', `vmaxps %xmm1, %xmm2, %xmm0`], 171 ['f64x2.pmin', `vminpd %xmm1, %xmm2, %xmm0`], 172 ['f64x2.pmax', `vmaxpd %xmm1, %xmm2, %xmm0`], 173 ['i8x16.swizzle', ` 174 vpaddusbx ${RIPR}, %xmm2, %xmm15 175 vpshufb %xmm15, %xmm1, %xmm0`], 176 ['i16x8.extmul_high_i8x16_s', ` 177 palignr \\$0x08, %xmm2, %xmm15 178 vpmovsxbw %xmm15, %xmm15 179 palignr \\$0x08, %xmm1, %xmm0 180 vpmovsxbw %xmm0, %xmm0 181 pmullw %xmm15, %xmm0`], 182 ['i32x4.extmul_low_i16x8_u', ` 183 vpmulhuw %xmm2, %xmm1, %xmm15 184 vpmullw %xmm2, %xmm1, %xmm0 185 punpcklwd %xmm15, %xmm0`], 186 ['i64x2.extmul_low_i32x4_s', ` 187 vpshufd \\$0x10, %xmm1, %xmm15 188 vpshufd \\$0x10, %xmm2, %xmm0 189 pmuldq %xmm15, %xmm0`], 190 ['i16x8.q15mulr_sat_s', ` 191 vpmulhrsw %xmm2, %xmm1, %xmm0 192 vpcmpeqwx ${RIPR}, %xmm0, %xmm15 193 pxor %xmm15, %xmm0`], 194 ]); 195 196 // Bitwise binary ops 197 codegenTestX64_v128xv128_v128_avxhack( 198 [['v128.and', `vpand %xmm2, %xmm1, %xmm0`], 199 ['v128.andnot', `vpandn %xmm1, %xmm2, %xmm0`], 200 ['v128.or', `vpor %xmm2, %xmm1, %xmm0`], 201 ['v128.xor', `vpxor %xmm2, %xmm1, %xmm0`]]); 202 203 204 // Replace lane ops. 205 codegenTestX64_adhoc(`(module 206 (func (export "f") (param v128 v128 i32) (result v128) 207 (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', ` 208 vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`); 209 codegenTestX64_adhoc(`(module 210 (func (export "f") (param v128 v128 i32) (result v128) 211 (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', ` 212 vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`); 213 codegenTestX64_adhoc(`(module 214 (func (export "f") (param v128 v128 i32) (result v128) 215 (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', ` 216 vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`); 217 codegenTestX64_adhoc(`(module 218 (func (export "f") (param v128 v128 i64) (result v128) 219 (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', ` 220 vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`); 221 222 223 if (isAvxPresent(2)) { 224 codegenTestX64_T_v128_avxhack( 225 [['i32', 'i8x16.splat', ` 226 vmovd ${GPR_I32}, %xmm0 227 vpbroadcastb %xmm0, %xmm0`], 228 ['i32', 'i16x8.splat', ` 229 vmovd ${GPR_I32}, %xmm0 230 vpbroadcastw %xmm0, %xmm0`], 231 ['i32', 'i32x4.splat', ` 232 vmovd ${GPR_I32}, %xmm0 233 vpbroadcastd %xmm0, %xmm0`], 234 ['i64', 'i64x2.splat', ` 235 vmovq ${GPR_I64}, %xmm0 236 vpbroadcastq %xmm0, %xmm0`], 237 ['f32', 'f32x4.splat', `vbroadcastss %xmm0, %xmm0`]], {log:true}); 238 239 codegenTestX64_T_v128_avxhack( 240 [['i32', 'v128.load8_splat', 241 'vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'], 242 ['i32', 'v128.load16_splat', 243 'vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'], 244 ['i32', 'v128.load32_splat', 245 'vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1}); 246 } 247 248 // Using VEX during shuffle ops 249 codegenTestX64_v128xv128_v128_avxhack([ 250 // Identity op on second argument should generate a move 251 ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', 252 'vmovdqa %xmm1, %xmm0'], 253 254 // Broadcast a byte from first argument 255 ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5', 256 ` 257 vpunpcklbw %xmm1, %xmm1, %xmm0 258 vpshufhw \\$0x55, %xmm0, %xmm0 259 vpshufd \\$0xAA, %xmm0, %xmm0`], 260 261 // Broadcast a word from first argument 262 ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5', 263 ` 264 vpshuflw \\$0xAA, %xmm1, %xmm0 265 vpshufd \\$0x00, %xmm0, %xmm0`], 266 267 // Permute words 268 ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13', 269 ` 270 vpshuflw \\$0xB1, %xmm1, %xmm0 271 vpshufhw \\$0xB1, %xmm0, %xmm0`], 272 273 // Permute doublewords 274 ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11', 275 'vpshufd \\$0xB1, %xmm1, %xmm0'], 276 277 // Interleave doublewords 278 ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23', 279 'vpunpckldq %xmm2, %xmm1, %xmm0'], 280 281 // Interleave quadwords 282 ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15', 283 'vpunpckhqdq %xmm1, %xmm2, %xmm0'], 284 285 // Rotate right 286 ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12', 287 `vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`], 288 ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11', 289 `vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]); 290 291 if (isAvxPresent(2)) { 292 codegenTestX64_v128xv128_v128_avxhack([ 293 // Broadcast low byte from second argument 294 ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0', 295 'vpbroadcastb %xmm1, %xmm0'], 296 297 // Broadcast low word from third argument 298 ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17', 299 'vpbroadcastw %xmm2, %xmm0'], 300 301 // Broadcast low doubleword from second argument 302 ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3', 303 'vpbroadcastd %xmm1, %xmm0']]); 304 } 305 306 // Testing AVX optimization where VPBLENDVB accepts four XMM registers as args. 307 codegenTestX64_adhoc( 308 `(func (export "f") (param v128 v128 v128 v128) (result v128) 309 (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15 310 (local.get 2)(local.get 3)))`, 311 'f', 312 ` 313 movdqax ${RIPR}, %xmm1 314 vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`); 315 316 // Constant arguments that are folded into the instruction 317 codegenTestX64_v128xLITERAL_v128_avxhack( 318 [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 319 `vpaddbx ${RIPR}, %xmm1, %xmm0`], 320 ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 321 `vpsubbx ${RIPR}, %xmm1, %xmm0`], 322 ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 323 `vpaddsbx ${RIPR}, %xmm1, %xmm0`], 324 ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 325 `vpaddusbx ${RIPR}, %xmm1, %xmm0`], 326 ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 327 `vpsubsbx ${RIPR}, %xmm1, %xmm0`], 328 ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 329 `vpsubusbx ${RIPR}, %xmm1, %xmm0`], 330 ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 331 `vpminsbx ${RIPR}, %xmm1, %xmm0`], 332 ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 333 `vpminubx ${RIPR}, %xmm1, %xmm0`], 334 ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 335 `vpmaxsbx ${RIPR}, %xmm1, %xmm0`], 336 ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 337 `vpmaxubx ${RIPR}, %xmm1, %xmm0`], 338 ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 339 `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], 340 ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` 341 vpcmpeqbx ${RIPR}, %xmm1, %xmm0 342 pcmpeqw %xmm15, %xmm15 343 pxor %xmm15, %xmm0`], 344 ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 345 `vpcmpgtbx ${RIPR}, %xmm1, %xmm0`], 346 ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` 347 vpcmpgtbx ${RIPR}, %xmm1, %xmm0 348 pcmpeqw %xmm15, %xmm15 349 pxor %xmm15, %xmm0`], 350 ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 351 `vpacksswbx ${RIPR}, %xmm1, %xmm0`], 352 ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 353 `vpackuswbx ${RIPR}, %xmm1, %xmm0`], 354 355 ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 356 `vpaddwx ${RIPR}, %xmm1, %xmm0`], 357 ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 358 `vpsubwx ${RIPR}, %xmm1, %xmm0`], 359 ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 360 `vpmullwx ${RIPR}, %xmm1, %xmm0`], 361 ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 362 `vpaddswx ${RIPR}, %xmm1, %xmm0`], 363 ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 364 `vpadduswx ${RIPR}, %xmm1, %xmm0`], 365 ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 366 `vpsubswx ${RIPR}, %xmm1, %xmm0`], 367 ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 368 `vpsubuswx ${RIPR}, %xmm1, %xmm0`], 369 ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 370 `vpminswx ${RIPR}, %xmm1, %xmm0`], 371 ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 372 `vpminuwx ${RIPR}, %xmm1, %xmm0`], 373 ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 374 `vpmaxswx ${RIPR}, %xmm1, %xmm0`], 375 ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 376 `vpmaxuwx ${RIPR}, %xmm1, %xmm0`], 377 ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 378 `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], 379 ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` 380 vpcmpeqwx ${RIPR}, %xmm1, %xmm0 381 pcmpeqw %xmm15, %xmm15 382 pxor %xmm15, %xmm0`], 383 ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 384 `vpcmpgtwx ${RIPR}, %xmm1, %xmm0`], 385 ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` 386 vpcmpgtwx ${RIPR}, %xmm1, %xmm0 387 pcmpeqw %xmm15, %xmm15 388 pxor %xmm15, %xmm0`], 389 ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 390 `vpackssdwx ${RIPR}, %xmm1, %xmm0`], 391 ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 392 `vpackusdwx ${RIPR}, %xmm1, %xmm0`], 393 394 ['i32x4.add', '(v128.const i32x4 1 2 1 2)', 395 `vpadddx ${RIPR}, %xmm1, %xmm0`], 396 ['i32x4.sub', '(v128.const i32x4 1 2 1 2)', 397 `vpsubdx ${RIPR}, %xmm1, %xmm0`], 398 ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', 399 `vpmulldx ${RIPR}, %xmm1, %xmm0`], 400 ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', 401 `vpminsdx ${RIPR}, %xmm1, %xmm0`], 402 ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', 403 `vpminudx ${RIPR}, %xmm1, %xmm0`], 404 ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', 405 `vpmaxsdx ${RIPR}, %xmm1, %xmm0`], 406 ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', 407 `vpmaxudx ${RIPR}, %xmm1, %xmm0`], 408 ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', 409 `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], 410 ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` 411 vpcmpeqdx ${RIPR}, %xmm1, %xmm0 412 pcmpeqw %xmm15, %xmm15 413 pxor %xmm15, %xmm0`], 414 ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)', 415 `vpcmpgtdx ${RIPR}, %xmm1, %xmm0`], 416 ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', ` 417 vpcmpgtdx ${RIPR}, %xmm1, %xmm0 418 pcmpeqw %xmm15, %xmm15 419 pxor %xmm15, %xmm0`], 420 ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', 421 `vpmaddwdx ${RIPR}, %xmm1, %xmm0`], 422 423 ['i64x2.add', '(v128.const i64x2 1 2)', 424 `vpaddqx ${RIPR}, %xmm1, %xmm0`], 425 ['i64x2.sub', '(v128.const i64x2 1 2)', 426 `vpsubqx ${RIPR}, %xmm1, %xmm0`], 427 428 ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 429 `vpandx ${RIPR}, %xmm1, %xmm0`], 430 ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 431 `vporx ${RIPR}, %xmm1, %xmm0`], 432 ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 433 `vpxorx ${RIPR}, %xmm1, %xmm0`], 434 435 ['f32x4.add', '(v128.const f32x4 1 2 3 4)', 436 `vaddpsx ${RIPR}, %xmm1, %xmm0`], 437 ['f32x4.sub', '(v128.const f32x4 1 2 3 4)', 438 `vsubpsx ${RIPR}, %xmm1, %xmm0`], 439 ['f32x4.mul', '(v128.const f32x4 1 2 3 4)', 440 `vmulpsx ${RIPR}, %xmm1, %xmm0`], 441 ['f32x4.div', '(v128.const f32x4 1 2 3 4)', 442 `vdivpsx ${RIPR}, %xmm1, %xmm0`], 443 444 ['f64x2.add', '(v128.const f64x2 1 2)', 445 `vaddpdx ${RIPR}, %xmm1, %xmm0`], 446 ['f64x2.sub', '(v128.const f64x2 1 2)', 447 `vsubpdx ${RIPR}, %xmm1, %xmm0`], 448 ['f64x2.mul', '(v128.const f64x2 1 2)', 449 `vmulpdx ${RIPR}, %xmm1, %xmm0`], 450 ['f64x2.div', '(v128.const f64x2 1 2)', 451 `vdivpdx ${RIPR}, %xmm1, %xmm0`], 452 453 ['f32x4.eq', '(v128.const f32x4 1 2 3 4)', 454 `vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`], 455 ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', 456 `vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`], 457 ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', 458 `vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`], 459 ['f32x4.le', '(v128.const f32x4 1 2 3 4)', 460 `vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`], 461 462 ['f64x2.eq', '(v128.const f64x2 1 2)', 463 `vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`], 464 ['f64x2.ne', '(v128.const f64x2 1 2)', 465 `vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`], 466 ['f64x2.lt', '(v128.const f64x2 1 2)', 467 `vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`], 468 ['f64x2.le', '(v128.const f64x2 1 2)', 469 `vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]); 470 471 // Commutative operations with constants on the lhs should generate the same 472 // code as with the constant on the rhs. 473 codegenTestX64_LITERALxv128_v128_avxhack( 474 [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 475 `vpaddbx ${RIPR}, %xmm1, %xmm0`], 476 ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 477 `vpaddsbx ${RIPR}, %xmm1, %xmm0`], 478 ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 479 `vpaddusbx ${RIPR}, %xmm1, %xmm0`], 480 ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 481 `vpminsbx ${RIPR}, %xmm1, %xmm0`], 482 ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 483 `vpminubx ${RIPR}, %xmm1, %xmm0`], 484 ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 485 `vpmaxsbx ${RIPR}, %xmm1, %xmm0`], 486 ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 487 `vpmaxubx ${RIPR}, %xmm1, %xmm0`], 488 ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 489 `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], 490 ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` 491 vpcmpeqbx ${RIPR}, %xmm1, %xmm0 492 pcmpeqw %xmm15, %xmm15 493 pxor %xmm15, %xmm0`], 494 495 ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 496 `vpaddwx ${RIPR}, %xmm1, %xmm0`], 497 ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 498 `vpmullwx ${RIPR}, %xmm1, %xmm0`], 499 ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 500 `vpaddswx ${RIPR}, %xmm1, %xmm0`], 501 ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 502 `vpadduswx ${RIPR}, %xmm1, %xmm0`], 503 ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 504 `vpminswx ${RIPR}, %xmm1, %xmm0`], 505 ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 506 `vpminuwx ${RIPR}, %xmm1, %xmm0`], 507 ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 508 `vpmaxswx ${RIPR}, %xmm1, %xmm0`], 509 ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 510 `vpmaxuwx ${RIPR}, %xmm1, %xmm0`], 511 ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', 512 `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], 513 ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` 514 vpcmpeqwx ${RIPR}, %xmm1, %xmm0 515 pcmpeqw %xmm15, %xmm15 516 pxor %xmm15, %xmm0`], 517 518 ['i32x4.add', '(v128.const i32x4 1 2 1 2)', 519 `vpadddx ${RIPR}, %xmm1, %xmm0`], 520 ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', 521 `vpmulldx ${RIPR}, %xmm1, %xmm0`], 522 ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', 523 `vpminsdx ${RIPR}, %xmm1, %xmm0`], 524 ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', 525 `vpminudx ${RIPR}, %xmm1, %xmm0`], 526 ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', 527 `vpmaxsdx ${RIPR}, %xmm1, %xmm0`], 528 ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', 529 `vpmaxudx ${RIPR}, %xmm1, %xmm0`], 530 ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', 531 `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], 532 ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` 533 vpcmpeqdx ${RIPR}, %xmm1, %xmm0 534 pcmpeqw %xmm15, %xmm15 535 pxor %xmm15, %xmm0`], 536 ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', 537 `vpmaddwdx ${RIPR}, %xmm1, %xmm0`], 538 539 ['i64x2.add', '(v128.const i64x2 1 2)', 540 `vpaddqx ${RIPR}, %xmm1, %xmm0`], 541 542 ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 543 `vpandx ${RIPR}, %xmm1, %xmm0`], 544 ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 545 `vporx ${RIPR}, %xmm1, %xmm0`], 546 ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', 547 `vpxorx ${RIPR}, %xmm1, %xmm0`]]); 548 549 // Shift by constant encodings 550 codegenTestX64_v128xLITERAL_v128_avxhack( 551 [['i8x16.shl', '(i32.const 2)', ` 552 vpaddb %xmm1, %xmm1, %xmm0 553 paddb %xmm0, %xmm0`], 554 ['i8x16.shl', '(i32.const 4)', ` 555 vpandx ${RIPR}, %xmm1, %xmm0 556 psllw \\$0x04, %xmm0`], 557 ['i16x8.shl', '(i32.const 1)', 558 'vpsllw \\$0x01, %xmm1, %xmm0'], 559 ['i16x8.shr_s', '(i32.const 3)', 560 'vpsraw \\$0x03, %xmm1, %xmm0'], 561 ['i16x8.shr_u', '(i32.const 2)', 562 'vpsrlw \\$0x02, %xmm1, %xmm0'], 563 ['i32x4.shl', '(i32.const 5)', 564 'vpslld \\$0x05, %xmm1, %xmm0'], 565 ['i32x4.shr_s', '(i32.const 2)', 566 'vpsrad \\$0x02, %xmm1, %xmm0'], 567 ['i32x4.shr_u', '(i32.const 5)', 568 'vpsrld \\$0x05, %xmm1, %xmm0'], 569 ['i64x2.shr_s', '(i32.const 7)', ` 570 vpshufd \\$0xF5, %xmm1, %xmm15 571 psrad \\$0x1F, %xmm15 572 vpxor %xmm15, %xmm1, %xmm0 573 psrlq \\$0x07, %xmm0 574 pxor %xmm15, %xmm0`]]); 575 576 // vpblendvp optimization when bitselect follows comparison. 577 codegenTestX64_adhoc( 578 `(module 579 (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128) 580 (v128.bitselect (local.get 2) (local.get 3) 581 (i32x4.eq (local.get 0) (local.get 1)))))`, 582 'f', ` 583 pcmpeqd %xmm1, %xmm0 584 vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`);