tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit fe186136bc53bc78d702235d16dfe598b015507e
parent 421732532b12b44076f9e7b894c7d83e728c0f74
Author: Ben Visness <bvisness@mozilla.com>
Date:   Wed,  8 Oct 2025 19:42:39 +0000

Bug 1990354: Rework codegen tests. r=rhunt

We include the binary encoding of each instruction in our codegen tests.
This is actually very fussy for basically no benefit. Our codegen tests
are already very fragile, and we tend to make them less fragile by using
regular expressions, e.g. using `%r..` instead of `%rdx`. But, we often
don't bother to modify the regex for the instruction encoding in the
process, making the whole exercise futile.

This patch just removes all instruction encoding from all our codegen
tests.

Differential Revision: https://phabricator.services.mozilla.com/D266863

Diffstat:
Mjs/src/jit-test/lib/adhoc-multiplatform-test.js | 54++++++++++++++++++++++++++++++------------------------
Mjs/src/jit-test/lib/codegen-arm64-test.js | 12++++++------
Mjs/src/jit-test/lib/codegen-test-common.js | 4++--
Mjs/src/jit-test/lib/codegen-x64-test.js | 11++++-------
Mjs/src/jit-test/lib/codegen-x86-test.js | 15++++++---------
Mjs/src/jit-test/tests/wasm/bce-x64-ion-codegen.js | 14+++++++-------
Mjs/src/jit-test/tests/wasm/bce-x86-ion-codegen.js | 16++++++++--------
Mjs/src/jit-test/tests/wasm/binop-arm64-ion-codegen.js | 144++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/binop-x64-ion-codegen.js | 120++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/binop-x64-ion-folding.js | 120++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/builtin-modules/js-string/inline-code.js | 20++++++++++----------
Mjs/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js | 504++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/lazy-tiering-codegen.js | 4++--
Mjs/src/jit-test/tests/wasm/memory-arm64-ion-codegen.js | 16++++++++--------
Mjs/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js | 550++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js | 250++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js | 16++++++++--------
Mjs/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js | 18+++++++++---------
Mjs/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js | 96++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mjs/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js | 20++++++++++----------
Mjs/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js | 18+++++++++---------
Mjs/src/jit-test/tests/wasm/simd/ion-bug1688713.js | 2+-
Mjs/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js | 28++++++++++++++--------------
Mjs/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js | 34+++++++++++++++++-----------------
Mjs/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js | 46+++++++++++++++++++++++-----------------------
Mjs/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js | 24++++++++++++------------
Mjs/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js | 42+++++++++++++++++++++---------------------
Mjs/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js | 22+++++++++++-----------
28 files changed, 1110 insertions(+), 1110 deletions(-)

diff --git a/js/src/jit-test/lib/adhoc-multiplatform-test.js b/js/src/jit-test/lib/adhoc-multiplatform-test.js @@ -61,42 +61,46 @@ const requiredArchs = ["x64", "x86", "arm64"]; // These define the end-of-prologue ("prefix") and start-of-epilogue // ("suffix") to be matched. -const prefixAndSuffix = +const archOptions = {x64: { + encoding: `(?:${HEX}{2} )*`, // The move from r14 to rbp is writing the callee's wasm instance // into the frame for debug checks -- see WasmFrame.h. - prefix: `48 89 e5 mov %rsp, %rbp( - 4c 89 75 .0 movq %r14, (0x10|0x30)\\(%rbp\\))?`, - suffix: `5d pop %rbp` + prefix: `mov %rsp, %rbp( + movq %r14, (0x10|0x30)\\(%rbp\\))?`, + suffix: `pop %rbp` }, x86: { + encoding: `(?:${HEX}{2} )*`, // The move from esi to rbp is writing the callee's wasm instance // into the frame for debug checks -- see WasmFrame.h. The mov to // e[ac]x is debug code, inserted by the register allocator to // clobber e[ac]x before a move group. But it is only present if // there is a move group there. - prefix: `8b ec mov %esp, %ebp( - 89 75 08 movl %esi, 0x08\\(%rbp\\))?( - b. ef be ad de mov \\$0xDEADBEEF, %e.x)?`, + prefix: `mov %esp, %ebp( + movl %esi, 0x08\\(%rbp\\))?( + mov \\$0xDEADBEEF, %e.x)?`, // `.bp` because zydis chooses `rbp` even on 32-bit systems. - suffix: `5d pop %.bp` + suffix: `pop %.bp` }, arm64: { + encoding: `${HEX}{8}`, // The move from x23 to x29 is writing the callee's wasm instance // into the frame for debug checks -- see WasmFrame.h. - prefix: `910003fd mov x29, sp - 910003fc mov x28, sp( - f9000bb7 str x23, \\[x29, #16\\])?`, - suffix: `f94007fe ldr x30, \\[sp, #8\\] - f94003fd ldr x29, \\[sp\\]` + prefix: `mov x29, sp + mov x28, sp( + str x23, \\[x29, #16\\])?`, + suffix: `ldr x30, \\[sp, #8\\] + ldr x29, \\[sp\\]` }, arm: { + encoding: `${HEX}{8} ${HEX}{8}`, // The move from r9 to fp is writing the callee's wasm instance into // the frame for debug checks -- see WasmFrame.h. - prefix: `e52db004 str fp, \\[sp, #-4\\]! - e1a0b00d mov fp, sp( - e58b9008 str r9, \\[fp, #\\+8\\])?`, - suffix: `e49db004 ldr fp, \\[sp\\], #\\+4` + prefix: `str fp, \\[sp, #-4\\]! + mov fp, sp( + str r9, \\[fp, #\\+8\\])?`, + suffix: `ldr fp, \\[sp\\], #\\+4` } }; @@ -186,12 +190,14 @@ function codegenTestMultiplatform_adhoc(module_text, export_name, // the options object. options = promoteArchSpecificOptions(options, archName); - // Get the prefix and suffix strings for the target. - assertEq(true, prefixAndSuffix.hasOwnProperty(archName)); - let prefix = prefixAndSuffix[archName].prefix; - let suffix = prefixAndSuffix[archName].suffix; - assertEq(true, prefix.length >= 10); - assertEq(true, suffix.length >= 10); + // Get the architecture-specific strings for the target. + assertEq(true, archOptions.hasOwnProperty(archName)); + let encoding = archOptions[archName].encoding; + let prefix = archOptions[archName].prefix; + let suffix = archOptions[archName].suffix; + assertEq(true, encoding.length > 0, `bad instruction encoding: ${encoding}`); + assertEq(true, prefix.length > 0, `bad prefix: ${prefix}`); + assertEq(true, suffix.length > 0, `bad suffix: ${suffix}`); // Get the expected output string, or skip the test if no expected output // has been provided. Note, because of the assertion near the top of this @@ -238,7 +244,7 @@ function codegenTestMultiplatform_adhoc(module_text, export_name, } expected = newExpected; } - expected = fixlines(expected); + expected = fixlines(expected, encoding); // Compile the test case and collect disassembly output. let ins = wasmEvalText(module_text, {}, options.features); diff --git a/js/src/jit-test/lib/codegen-arm64-test.js b/js/src/jit-test/lib/codegen-arm64-test.js @@ -5,15 +5,15 @@ load(libdir + "codegen-test-common.js"); // End of prologue var arm64_prefix = ` -910003fd mov x29, sp -910003fc mov x28, sp( -f9000bb7 str x23, \\[x29, #16\\])? +mov x29, sp +mov x28, sp( +str x23, \\[x29, #16\\])? `; // Start of epilogue var arm64_suffix = ` -f94007fe ldr x30, \\[sp, #8\\] -f94003fd ldr x29, \\[sp\\] +ldr x30, \\[sp, #8\\] +ldr x29, \\[sp\\] `; // For when nothing else applies: `module_text` is the complete source text of @@ -33,7 +33,7 @@ function codegenTestARM64_adhoc(module_text, export_name, expected, options = {} expected = arm64_prefix + '\n' + expected; if (!options.no_suffix) expected = expected + '\n' + arm64_suffix; - expected = fixlines(expected); + expected = fixlines(expected, `${HEX}{8}`); const output_matches_expected = output.match(new RegExp(expected)) != null; if (!output_matches_expected) { diff --git a/js/src/jit-test/lib/codegen-test-common.js b/js/src/jit-test/lib/codegen-test-common.js @@ -11,11 +11,11 @@ function wrap(options, funcs) { return `(module ${funcs})`; } -function fixlines(s) { +function fixlines(s, insEncoding) { return s.split(/\n+/) .map(strip) .filter(x => x.length > 0) - .map(x => '(?:0x)?' + HEXES + ' ' + x) + .map(x => `(?:0x)?${HEX}+ ${insEncoding} ${x}`) .map(spaces) .join('\n'); } diff --git a/js/src/jit-test/lib/codegen-x64-test.js b/js/src/jit-test/lib/codegen-x64-test.js @@ -33,18 +33,15 @@ load(libdir + "codegen-test-common.js"); // RIP-relative address following the instruction mnemonic var RIPR = `0x${HEXES}`; -// RIP-relative address in the binary encoding -var RIPRADDR = `${HEX}{2} ${HEX}{2} ${HEX}{2} ${HEX}{2}`; - // End of prologue. The move from r14 to rbp is writing the callee's wasm // instance into the frame for debug checks -- see WasmFrame.h. var x64_prefix = ` -48 89 e5 mov %rsp, %rbp( -4c 89 75 .0 movq %r14, (0x10|0x30)\\(%rbp\\))? +mov %rsp, %rbp( +movq %r14, (0x10|0x30)\\(%rbp\\))? ` // Start of epilogue -var x64_suffix = `5d pop %rbp`; +var x64_suffix = `pop %rbp`; // v128 OP v128 -> v128 // inputs: [[complete-opname, expected-pattern], ...] @@ -166,7 +163,7 @@ function codegenTestX64_adhoc(module_text, export_name, expected, options = {}) if (!options.no_suffix) expected = expected + '\n' + x64_suffix; const expected_pretty = striplines(expected); - expected = fixlines(expected); + expected = fixlines(expected, `(?:${HEX}{2} )*`); const success = output.match(new RegExp(expected)) != null; if (options.log || !success) { diff --git a/js/src/jit-test/lib/codegen-x86-test.js b/js/src/jit-test/lib/codegen-x86-test.js @@ -9,9 +9,6 @@ load(libdir + "codegen-test-common.js"); // Absolute address (disp32) following the instruction mnemonic. var ABS = `0x${HEXES}`; -// Absolute address (disp32) in the binary encoding. -var ABSADDR = `${HEX}{2} ${HEX}{2} ${HEX}{2} ${HEX}{2}`; - // End of prologue. The move from esi to rbp is writing the callee's wasm // instance into the frame for debug checks -- see WasmFrame.h. The mov to eax // is debug code, inserted by the register allocator to clobber eax before a @@ -19,18 +16,18 @@ var ABSADDR = `${HEX}{2} ${HEX}{2} ${HEX}{2} ${HEX}{2}`; // // -0x21524111 is 0xDEADBEEF. var x86_prefix = ` -8b ec mov %esp, %ebp( -89 75 08 movl %esi, 0x08\\(%rbp\\))?( -b8 ef be ad de mov \\$-0x21524111, %eax)? +mov %esp, %ebp( +movl %esi, 0x08\\(%rbp\\))?( +mov \\$-0x21524111, %eax)? ` // `.bp` because zydis chooses 'rbp' even on 32-bit systems var x86_loadarg0 = ` -f3 0f 6f 45 ${HEX}{2} movdqux 0x${HEXES}\\(%.bp\\), %xmm0 +movdqux 0x${HEXES}\\(%.bp\\), %xmm0 `; // Start of epilogue. `.bp` for the same reason as above. -var x86_suffix = `5d pop %.bp`; +var x86_suffix = `pop %.bp`; // v128 OP literal -> v128 // inputs: [[complete-opname, rhs-literal, expected-pattern], ...] @@ -60,7 +57,7 @@ function codegenTestX86_adhoc(module_text, export_name, expected, options = {}) expected = x86_prefix + '\n' + expected; if (!options.no_suffix) expected = expected + '\n' + x86_suffix; - expected = fixlines(expected); + expected = fixlines(expected, `(?:${HEX}{2} )*`); const output_matches_expected = output.match(new RegExp(expected)) != null; if (!output_matches_expected) { diff --git a/js/src/jit-test/tests/wasm/bce-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/bce-x64-ion-codegen.js @@ -31,10 +31,10 @@ for ( let memType of memTypes ) { drop (i32.load (local.get 1))))`, 'f', ` -48 3b .. cmp %r.., %r.. -0f 83 .. 00 00 00 jnb 0x00000000000000.. -41 8b .. .. movl \\(%r15,%r..,1\\), %e.. -41 8b .. .. movl \\(%r15,%r..,1\\), %eax`, +cmp %r.., %r.. +jnb 0x00000000000000.. +movl \\(%r15,%r..,1\\), %e.. +movl \\(%r15,%r..,1\\), %eax`, {no_prefix:true}); // Make sure constant indices below the heap minimum do not require a bounds @@ -45,7 +45,7 @@ for ( let memType of memTypes ) { (func (export "f") (result i32) (i32.load (${memType}.const 16))))`, 'f', - `41 8b 47 10 movl 0x10\\(%r15\\), %eax`); + `movl 0x10\\(%r15\\), %eax`); // Ditto, even at the very limit of the known heap, extending into the guard // page. This is an OOB access, of course, but it needs no explicit bounds @@ -57,7 +57,7 @@ for ( let memType of memTypes ) { (i32.load (${memType}.const 65535))))`, 'f', ` -b8 ff ff 00 00 mov \\$0xFFFF, %eax -41 8b 04 07 movl \\(%r15,%rax,1\\), %eax`); +mov \\$0xFFFF, %eax +movl \\(%r15,%rax,1\\), %eax`); } diff --git a/js/src/jit-test/tests/wasm/bce-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/bce-x86-ion-codegen.js @@ -23,10 +23,10 @@ codegenTestX86_adhoc( drop (i32.load (local.get 1))))`, 'f', ` -3b .. cmp %e.., %e.. -0f 83 .. 00 00 00 jnb 0x00000000000000.. -8b .. .. movl \\(%r..,%r..,1\\), %e.. -8b .. .. movl \\(%r..,%r..,1\\), %eax`, +cmp %e.., %e.. +jnb 0x00000000000000.. +movl \\(%r..,%r..,1\\), %e.. +movl \\(%r..,%r..,1\\), %eax`, {no_prefix:true}); // Make sure constant indices below the heap minimum do not require a bounds check. @@ -37,8 +37,8 @@ codegenTestX86_adhoc( (func (export "f") (result i32) (i32.load (i32.const 16))))`, 'f', ` -8b .. movl \\(%rsi\\), %e.. -8b .. 10 movl 0x10\\(%r..\\), %eax`); +movl \\(%rsi\\), %e.. +movl 0x10\\(%r..\\), %eax`); // Ditto, even at the very limit of the known heap, extending into the guard // page. This is an OOB access, of course, but it needs no explicit bounds @@ -50,5 +50,5 @@ codegenTestX86_adhoc( (i32.load (i32.const 65535))))`, 'f', ` -8b .. movl \\(%rsi\\), %e.. -8b .. ff ff 00 00 movl 0xFFFF\\(%r..\\), %eax`); +movl \\(%rsi\\), %e.. +movl 0xFFFF\\(%r..\\), %eax`); diff --git a/js/src/jit-test/tests/wasm/binop-arm64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-arm64-ion-codegen.js @@ -3,19 +3,19 @@ // Basic constant folding tests for ( [op, lhs, rhs, expect] of - [['add', 5, 8, 'd28001a0 mov x0, #0xd'], - ['sub', 4, 5, '92800000 mov x0, #0xffffffffffffffff'], - ['mul', 8, 3, 'd2800300 mov x0, #0x18'], - ['div_s', -8, 3, '92800020 mov x0, #0xfffffffffffffffe'], - ['div_u', 8, 3, 'd2800040 mov x0, #0x2'], - ['rem_s', 8, 5, 'd2800060 mov x0, #0x3'], - ['rem_u', -7, 4, 'd2800020 mov x0, #0x1'], - ['and', 0xfe, 0x77, 'd2800ec0 mov x0, #0x76'], - ['or', 0xfe, 0x77, 'd2801fe0 mov x0, #0xff'], - ['xor', 0xfe, 0x77, 'd2801120 mov x0, #0x89'], - ['shl', 3, 4, 'd2800600 mov x0, #0x30'], - ['shr_s', -8, 1, '92800060 mov x0, #0xfffffffffffffffc'], - ['shr_u', -8, 1, 'b27ef3e0 mov x0, #0x7ffffffffffffffc']] ) { + [['add', 5, 8, 'mov x0, #0xd'], + ['sub', 4, 5, 'mov x0, #0xffffffffffffffff'], + ['mul', 8, 3, 'mov x0, #0x18'], + ['div_s', -8, 3, 'mov x0, #0xfffffffffffffffe'], + ['div_u', 8, 3, 'mov x0, #0x2'], + ['rem_s', 8, 5, 'mov x0, #0x3'], + ['rem_u', -7, 4, 'mov x0, #0x1'], + ['and', 0xfe, 0x77, 'mov x0, #0x76'], + ['or', 0xfe, 0x77, 'mov x0, #0xff'], + ['xor', 0xfe, 0x77, 'mov x0, #0x89'], + ['shl', 3, 4, 'mov x0, #0x30'], + ['shr_s', -8, 1, 'mov x0, #0xfffffffffffffffc'], + ['shr_u', -8, 1, 'mov x0, #0x7ffffffffffffffc']] ) { codegenTestARM64_adhoc(` (module (func (export "f") (result i64) @@ -28,10 +28,10 @@ for ( [op, lhs, rhs, expect] of // arg 1 here to force an explicit move to be emitted. for ( [op, args, expect] of - [['add', '(local.get 1) (i64.const 0)', 'aa0103e0 mov x0, x1'], - ['add', '(i64.const 0) (local.get 1)', 'aa0103e0 mov x0, x1'], - ['mul', '(local.get 1) (i64.const 1)', 'aa0103e0 mov x0, x1'], - ['mul', '(i64.const 1) (local.get 1)', 'aa0103e0 mov x0, x1']] ) { + [['add', '(local.get 1) (i64.const 0)', 'mov x0, x1'], + ['add', '(i64.const 0) (local.get 1)', 'mov x0, x1'], + ['mul', '(local.get 1) (i64.const 1)', 'mov x0, x1'], + ['mul', '(i64.const 1) (local.get 1)', 'mov x0, x1']] ) { codegenTestARM64_adhoc(` (module (func (export "f") (param i64) (param i64) (result i64) @@ -49,7 +49,7 @@ let neg32 = codegenTestARM64_adhoc( neg32, 'f', - '4b0003e0 neg w0, w0'); + 'neg w0, w0'); assertEq(wasmEvalText(neg32).exports.f(-37), 37) assertEq(wasmEvalText(neg32).exports.f(42), -42) @@ -59,7 +59,7 @@ let neg64 = `(module codegenTestARM64_adhoc( neg64, 'f', - 'cb0003e0 neg x0, x0'); + 'neg x0, x0'); assertEq(wasmEvalText(neg64).exports.f(-37000000000n), 37000000000n) assertEq(wasmEvalText(neg64).exports.f(42000000000n), -42000000000n) @@ -72,7 +72,7 @@ let zero32 = codegenTestARM64_adhoc( zero32, 'f', - '2a1f03e0 mov w0, wzr'); + 'mov w0, wzr'); assertEq(wasmEvalText(zero32).exports.f(-37), 0) assertEq(wasmEvalText(zero32).exports.f(42), 0) @@ -82,7 +82,7 @@ let zero64 = `(module codegenTestARM64_adhoc( zero64, 'f', - 'aa1f03e0 mov x0, xzr'); + 'mov x0, xzr'); assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n) assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n) @@ -119,7 +119,7 @@ let double32 = codegenTestARM64_adhoc( double32, 'f', - '0b000000 add w0, w0, w0'); + 'add w0, w0, w0'); assertEq(wasmEvalText(double32).exports.f(-37), -74) assertEq(wasmEvalText(double32).exports.f(42), 84) @@ -129,7 +129,7 @@ let double64 = `(module codegenTestARM64_adhoc( double64, 'f', - '8b000000 add x0, x0, x0'); + 'add x0, x0, x0'); assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n) assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n) @@ -142,7 +142,7 @@ let quad32 = codegenTestARM64_adhoc( quad32, 'f', - '531e7400 lsl w0, w0, #2'); + 'lsl w0, w0, #2'); assertEq(wasmEvalText(quad32).exports.f(-37), -148) assertEq(wasmEvalText(quad32).exports.f(42), 168) @@ -152,7 +152,7 @@ let quad64 = `(module codegenTestARM64_adhoc( quad64, 'f', - 'd37ef400 lsl x0, x0, #2'); + 'lsl x0, x0, #2'); assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n) assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n) @@ -165,8 +165,8 @@ let quint32 = codegenTestARM64_adhoc( quint32, 'f', - `528000b0 mov w16, #0x5 - 1b107c00 mul w0, w0, w16`); + `mov w16, #0x5 + mul w0, w0, w16`); assertEq(wasmEvalText(quint32).exports.f(-37), -37*5) assertEq(wasmEvalText(quint32).exports.f(42), 42*5) @@ -176,8 +176,8 @@ let quint64 = `(module codegenTestARM64_adhoc( quint64, 'f', - `d28000b0 mov x16, #0x5 - 9b107c00 mul x0, x0, x16`); + `mov x16, #0x5 + mul x0, x0, x16`); assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n) assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n) @@ -190,20 +190,20 @@ assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n) for ( [op, imm, expectVar, expectImm] of [['and', 64, - '8a020020 and x0, x1, x2', - '927a0020 and x0, x1, #0x40'], + 'and x0, x1, x2', + 'and x0, x1, #0x40'], ['or', 64, - 'aa020020 orr x0, x1, x2', - 'b27a0020 orr x0, x1, #0x40'], + 'orr x0, x1, x2', + 'orr x0, x1, #0x40'], ['xor', 64, - 'ca020020 eor x0, x1, x2', - 'd27a0020 eor x0, x1, #0x40'], + 'eor x0, x1, x2', + 'eor x0, x1, #0x40'], ['add', 64, - '8b020020 add x0, x1, x2', - '91010020 add x0, x1, #0x40 \\(64\\)'], + 'add x0, x1, x2', + 'add x0, x1, #0x40 \\(64\\)'], ['sub', 64, - 'cb020020 sub x0, x1, x2', - 'd1010020 sub x0, x1, #0x40 \\(64\\)']] ) { + 'sub x0, x1, x2', + 'sub x0, x1, #0x40 \\(64\\)']] ) { codegenTestARM64_adhoc(` (module (func (export "f") (param i64) (param i64) (param i64) (result i64) @@ -226,11 +226,11 @@ for ( [op, imm, expectVar, expectImm] of // codegen here. for ( [op, expect] of - [['shl', 'd37ef420 lsl x0, x1, #2'], - ['shr_s', '9342fc20 asr x0, x1, #2'], - ['shr_u', 'd342fc20 lsr x0, x1, #2'], - ['rotl', '93c1f820 ror x0, x1, #62'], - ['rotr', '93c10820 ror x0, x1, #2']] ) { + [['shl', 'lsl x0, x1, #2'], + ['shr_s', 'asr x0, x1, #2'], + ['shr_u', 'lsr x0, x1, #2'], + ['rotl', 'ror x0, x1, #62'], + ['rotr', 'ror x0, x1, #2']] ) { codegenTestARM64_adhoc(` (module (func (export "f") (param i64) (param i64) (result i64) @@ -248,7 +248,7 @@ let subneg32 = codegenTestARM64_adhoc( subneg32, 'f', - '4b0003e0 neg w0, w0'); + 'neg w0, w0'); assertEq(wasmEvalText(subneg32).exports.f(-37), 37) assertEq(wasmEvalText(subneg32).exports.f(42), -42) @@ -258,7 +258,7 @@ let subneg64 = `(module codegenTestARM64_adhoc( subneg64, 'f', - 'cb0003e0 neg x0, x0'); + 'neg x0, x0'); assertEq(wasmEvalText(subneg64).exports.f(-37000000000n), 37000000000n) assertEq(wasmEvalText(subneg64).exports.f(42000000000n), -42000000000n) @@ -270,16 +270,16 @@ codegenTestARM64_adhoc( (func (export "f") (param i64) (param i64) (param i64) (param i32) (result i64) (select (local.get 1) (local.get 2) (local.get 3))))`, 'f', - `6a03007f tst w3, w3 - 9a821020 csel x0, x1, x2, ne`) + `tst w3, w3 + csel x0, x1, x2, ne`) codegenTestARM64_adhoc( `(module (func (export "f") (param f64) (param f64) (param f64) (param i32) (result f64) (select (local.get 1) (local.get 2) (local.get 3))))`, 'f', - `6a00001f tst w0, w0 - 1e621c20 fcsel d0, d1, d2, ne`) + `tst w0, w0 + fcsel d0, d1, d2, ne`) // Here we test that no boolean is generated and then re-tested, and that // operands are swapped so that we can use an immediate constant, and that the @@ -290,17 +290,17 @@ codegenTestARM64_adhoc( (func (export "f") (param $a i32) (param $b i32) (param $c i32) (param $d i32) (result i32) (select (local.get $b) (local.get $d) (i32.lt_s (i32.const 0) (local.get $c)))))`, 'f', - `7100005f cmp w2, #0x0 \\(0\\) - 1a83c020 csel w0, w1, w3, gt`) + `cmp w2, #0x0 \\(0\\) + csel w0, w1, w3, gt`) codegenTestARM64_adhoc( `(module (func (export "f") (param $a f64) (param $b f64) (param $c f64) (param $d f64) (result f64) (select (local.get $b) (local.get $d) (f64.lt (f64.const 0) (local.get $c)))))`, 'f', - `2f00e400 movi d0, #0x0 - 1e622000 fcmp d0, d2 - 1e633c20 fcsel d0, d1, d3, lo`) + `movi d0, #0x0 + fcmp d0, d2 + fcsel d0, d1, d3, lo`) // FP ABS should not tie its input to its output. @@ -309,22 +309,22 @@ codegenTestARM64_adhoc( (func (export "f") (param f32) (param f32) (result f32) (f32.abs (local.get 1))))`, 'f', - '1e20c020 fabs s0, s1'); + 'fabs s0, s1'); codegenTestARM64_adhoc( `(module (func (export "f") (param f64) (param f64) (result f64) (f64.abs (local.get 1))))`, 'f', - '1e60c020 fabs d0, d1'); + 'fabs d0, d1'); // AND{32,64} followed by `== 0`: check the two operations are merged into a // single 'tst' insn, and no 'and' insn. The merging isn't done for // {OR,XOR}{32,64}. This is for both arguments being non-constant. for ( [ty, expect_tst] of - [['i32', '6a01001f tst w0, w1'], - ['i64', 'ea01001f tst x0, x1']] ) { + [['i32', 'tst w0, w1'], + ['i64', 'tst x0, x1']] ) { codegenTestARM64_adhoc( `(module (func (export "f") (param $p1 ${ty}) (param $p2 ${ty}) (result i32) @@ -339,10 +339,10 @@ for ( [ty, expect_tst] of )`, 'f', `${expect_tst} - 54000061 b\\.ne #\\+0xc \\(addr .*\\) - 52823ae0 mov w0, #0x11d7 - 14000002 b #\\+0x8 \\(addr .*\\) - 52809a40 mov w0, #0x4d2` + b\\.ne #\\+0xc \\(addr .*\\) + mov w0, #0x11d7 + b #\\+0x8 \\(addr .*\\) + mov w0, #0x4d2` ); } @@ -351,12 +351,12 @@ for ( [ty, expect_tst] of for ( [imm, expect1, expect2] of [ // as a valid logical-immediate => imm in insn ['0x0F0F0F0F0F0F0F0F', - 'f200cc1f tst x0, #0xf0f0f0f0f0f0f0f', + 'tst x0, #0xf0f0f0f0f0f0f0f', ''], // anything else => imm synth'd into a reg ['-0x4771', - '9288ee10 mov x16, #0xffffffffffffb88f', - 'ea10001f tst x0, x16']] + 'mov x16, #0xffffffffffffb88f', + 'tst x0, x16']] ) { codegenTestARM64_adhoc( `(module @@ -373,10 +373,10 @@ for ( [imm, expect1, expect2] of 'f', `${expect1} ${expect2} - 54000061 b\\.ne #\\+0xc \\(addr .*\\) - 52823ae0 mov w0, #0x11d7 - 14000002 b #\\+0x8 \\(addr .*\\) - 52809a40 mov w0, #0x4d2` + b\\.ne #\\+0xc \\(addr .*\\) + mov w0, #0x11d7 + b #\\+0x8 \\(addr .*\\) + mov w0, #0x4d2` ); } @@ -401,7 +401,7 @@ for ( [cmpTy, cmpOp, selTy, cmpRegPfx, cselRegPfx, armCC] of ) )`, 'f', - `.b01001f cmp ${cmpRegPfx}0, ${cmpRegPfx}1 - .a83.040 csel ${cselRegPfx}0, ${cselRegPfx}2, ${cselRegPfx}3, ${armCC}` + `cmp ${cmpRegPfx}0, ${cmpRegPfx}1 + csel ${cselRegPfx}0, ${cselRegPfx}2, ${cselRegPfx}3, ${armCC}` ); } diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/binop-x64-ion-codegen.js @@ -13,7 +13,7 @@ let neg32 = codegenTestX64_adhoc( neg32, 'f', - 'f7 d8 neg %eax', {no_prefix:true}); + 'neg %eax', {no_prefix:true}); assertEq(wasmEvalText(neg32).exports.f(-37), 37) assertEq(wasmEvalText(neg32).exports.f(42), -42) @@ -24,7 +24,7 @@ let neg64 = codegenTestX64_adhoc( neg64, 'f', - '48 f7 d8 neg %rax', {no_prefix:true}); + 'neg %rax', {no_prefix:true}); assertEq(wasmEvalText(neg64).exports.f(-37000000000n), 37000000000n) assertEq(wasmEvalText(neg64).exports.f(42000000000n), -42000000000n) @@ -37,7 +37,7 @@ let zero32 = codegenTestX64_adhoc( zero32, 'f', - '33 c0 xor %eax, %eax', {no_prefix:true}); + 'xor %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(zero32).exports.f(-37), 0) assertEq(wasmEvalText(zero32).exports.f(42), 0) @@ -47,7 +47,7 @@ let zero64 = `(module codegenTestX64_adhoc( zero64, 'f', - '48 33 c0 xor %rax, %rax', {no_prefix:true}); + 'xor %rax, %rax', {no_prefix:true}); assertEq(wasmEvalText(zero64).exports.f(-37000000000n), 0n) assertEq(wasmEvalText(zero64).exports.f(42000000000n), 0n) @@ -83,7 +83,7 @@ let double32 = codegenTestX64_adhoc( double32, 'f', - '03 c0 add %eax, %eax', {no_prefix:true}); + 'add %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(double32).exports.f(-37), -74) assertEq(wasmEvalText(double32).exports.f(42), 84) @@ -93,7 +93,7 @@ let double64 = `(module codegenTestX64_adhoc( double64, 'f', - '48 03 c0 add %rax, %rax', {no_prefix:true}); + 'add %rax, %rax', {no_prefix:true}); assertEq(wasmEvalText(double64).exports.f(-37000000000n), -74000000000n) assertEq(wasmEvalText(double64).exports.f(42000000000n), 84000000000n) @@ -106,7 +106,7 @@ let quad32 = codegenTestX64_adhoc( quad32, 'f', - 'c1 e0 02 shl \\$0x02, %eax', {no_prefix:true}); + 'shl \\$0x02, %eax', {no_prefix:true}); assertEq(wasmEvalText(quad32).exports.f(-37), -148) assertEq(wasmEvalText(quad32).exports.f(42), 168) @@ -116,7 +116,7 @@ let quad64 = `(module codegenTestX64_adhoc( quad64, 'f', - '48 c1 e0 02 shl \\$0x02, %rax', {no_prefix:true}); + 'shl \\$0x02, %rax', {no_prefix:true}); assertEq(wasmEvalText(quad64).exports.f(-37000000000n), -148000000000n) assertEq(wasmEvalText(quad64).exports.f(42000000000n), 168000000000n) @@ -129,7 +129,7 @@ let quint32 = codegenTestX64_adhoc( quint32, 'f', - '6b c0 05 imul \\$0x05, %eax, %eax', {no_prefix:true}); + 'imul \\$0x05, %eax, %eax', {no_prefix:true}); assertEq(wasmEvalText(quint32).exports.f(-37), -37*5) assertEq(wasmEvalText(quint32).exports.f(42), 42*5) @@ -139,7 +139,7 @@ let quint64 = `(module codegenTestX64_adhoc( quint64, 'f', - `48 6b c0 05 imul \\$0x05, %rax, %rax`, {no_prefix:true}) + `imul \\$0x05, %rax, %rax`, {no_prefix:true}) assertEq(wasmEvalText(quint64).exports.f(-37000000000n), -37000000000n*5n) assertEq(wasmEvalText(quint64).exports.f(42000000000n), 42000000000n*5n) @@ -152,7 +152,7 @@ let subneg32 = codegenTestX64_adhoc( subneg32, 'f', - 'f7 d8 neg %eax', {no_prefix:true}); + 'neg %eax', {no_prefix:true}); assertEq(wasmEvalText(subneg32).exports.f(-37), 37) assertEq(wasmEvalText(subneg32).exports.f(42), -42) @@ -163,7 +163,7 @@ let subneg64 = codegenTestX64_adhoc( subneg64, 'f', - '48 f7 d8 neg %rax', {no_prefix:true}); + 'neg %rax', {no_prefix:true}); assertEq(wasmEvalText(subneg64).exports.f(-37000000000n), 37000000000n) assertEq(wasmEvalText(subneg64).exports.f(42000000000n), -42000000000n) @@ -172,8 +172,8 @@ assertEq(wasmEvalText(subneg64).exports.f(42000000000n), -42000000000n) // {OR,XOR}{32,64}. This is for both arguments being non-constant. for ( [ty, expect_test] of - [['i32', '85 .. test %e.., %e..'], - ['i64', '48 85 .. test %r.., %r..']] ) { + [['i32', 'test %e.., %e..'], + ['i64', 'test %r.., %r..']] ) { codegenTestX64_adhoc( `(module (func (export "f") (param $p1 ${ty}) (param $p2 ${ty}) (result i32) @@ -188,10 +188,10 @@ for ( [ty, expect_test] of )`, 'f', `${expect_test} - 0f 85 .. 00 00 00 jnz 0x00000000000000.. - b8 d7 11 00 00 mov \\$0x11D7, %eax - e9 .. 00 00 00 jmp 0x00000000000000.. - b8 d2 04 00 00 mov \\$0x4D2, %eax` + jnz 0x00000000000000.. + mov \\$0x11D7, %eax + jmp 0x00000000000000.. + mov \\$0x4D2, %eax` ); } @@ -201,16 +201,16 @@ for ( [ty, expect_test] of for ( [imm, expect1, expect2] of [ // in signed-32 range => imm in insn ['0x17654321', - 'f7 c. 21 43 65 17 test \\$0x17654321, %e..', // edi or ecx + 'test \\$0x17654321, %e..', // edi or ecx ''], // in unsigned-32 range => imm in reg via movl ['0x87654321', - '41 bb 21 43 65 87 mov \\$-0x789ABCDF, %r11d', - '4c 85 d. test %r11, %r..'], // rdi or rcx + 'mov \\$-0x789ABCDF, %r11d', + 'test %r11, %r..'], // rdi or rcx // not in either range => imm in reg via mov(absq) ['0x187654321', - '49 bb 21 43 65 87 01 00 00 00 mov \\$0x187654321, %r11', - '4c 85 d. test %r11, %r..']] // rdi or rcx + 'mov \\$0x187654321, %r11', + 'test %r11, %r..']] // rdi or rcx ) { codegenTestX64_adhoc( `(module @@ -227,10 +227,10 @@ for ( [imm, expect1, expect2] of 'f', `${expect1} ${expect2} - 0f 85 .. 00 00 00 jnz 0x00000000000000.. - b8 d7 11 00 00 mov \\$0x11D7, %eax - e9 .. 00 00 00 jmp 0x00000000000000.. - b8 d2 04 00 00 mov \\$0x4D2, %eax` + jnz 0x00000000000000.. + mov \\$0x11D7, %eax + jmp 0x00000000000000.. + mov \\$0x4D2, %eax` ); } @@ -251,21 +251,21 @@ function cmpSel32vs64(cmpTy, cmpOp, selTy) { } if (getBuildConfiguration("windows")) { for ( [cmpTy, cmpOp, selTy, insn1, insn2, insn3] of - [ ['i32', 'le_s', 'i32', '8b c3 mov %ebx, %eax', - '3b ca cmp %edx, %ecx', - '41 0f 4f c1 cmovnle %r9d, %eax'], - ['i32', 'lt_u', 'i64', '48 89 d8 mov %rbx, %rax', - '3b ca cmp %edx, %ecx', - '49 0f 43 c1 cmovnb %r9, %rax'], - ['i64', 'le_s', 'i32', '8b c3 mov %ebx, %eax', - '48 3b ca cmp %rdx, %rcx', - '41 0f 4f c1 cmovnle %r9d, %eax'], - ['i64', 'lt_u', 'i64', '48 89 d8 mov %rbx, %rax', - '48 3b ca cmp %rdx, %rcx', - '49 0f 43 c1 cmovnb %r9, %rax'] + [ ['i32', 'le_s', 'i32', 'mov %ebx, %eax', + 'cmp %edx, %ecx', + 'cmovnle %r9d, %eax'], + ['i32', 'lt_u', 'i64', 'mov %rbx, %rax', + 'cmp %edx, %ecx', + 'cmovnb %r9, %rax'], + ['i64', 'le_s', 'i32', 'mov %ebx, %eax', + 'cmp %rdx, %rcx', + 'cmovnle %r9d, %eax'], + ['i64', 'lt_u', 'i64', 'mov %rbx, %rax', + 'cmp %rdx, %rcx', + 'cmovnb %r9, %rax'] ] ) { codegenTestX64_adhoc(cmpSel32vs64(cmpTy, cmpOp, selTy), 'f', - `4. (89 c3|8b d8) mov %r8.*, %.bx + `mov %r8.*, %.bx ${insn1} ${insn2} ${insn3}` @@ -273,18 +273,18 @@ if (getBuildConfiguration("windows")) { } } else { for ( [cmpTy, cmpOp, selTy, insn1, insn2, insn3] of - [ ['i32', 'le_s', 'i32', '8b c2 mov %edx, %eax', - '3b fe cmp %esi, %edi', - '0f 4f c1 cmovnle %ecx, %eax'], - ['i32', 'lt_u', 'i64', '48 89 d0 mov %rdx, %rax', - '3b fe cmp %esi, %edi', - '48 0f 43 c1 cmovnb %rcx, %rax'], - ['i64', 'le_s', 'i32', '8b c2 mov %edx, %eax', - '48 3b fe cmp %rsi, %rdi', - '0f 4f c1 cmovnle %ecx, %eax'], - ['i64', 'lt_u', 'i64', '48 89 d0 mov %rdx, %rax', - '48 3b fe cmp %rsi, %rdi', - '48 0f 43 c1 cmovnb %rcx, %rax'] + [ ['i32', 'le_s', 'i32', 'mov %edx, %eax', + 'cmp %esi, %edi', + 'cmovnle %ecx, %eax'], + ['i32', 'lt_u', 'i64', 'mov %rdx, %rax', + 'cmp %esi, %edi', + 'cmovnb %rcx, %rax'], + ['i64', 'le_s', 'i32', 'mov %edx, %eax', + 'cmp %rsi, %rdi', + 'cmovnle %ecx, %eax'], + ['i64', 'lt_u', 'i64', 'mov %rdx, %rax', + 'cmp %rsi, %rdi', + 'cmovnb %rcx, %rax'] ] ) { codegenTestX64_adhoc(cmpSel32vs64(cmpTy, cmpOp, selTy), 'f', `${insn1} @@ -297,19 +297,19 @@ if (getBuildConfiguration("windows")) { // For integer comparison followed by select, check correct use of operands in // registers vs memory. At least for the 64-bit-cmp/64-bit-sel case. -for ( [pAnyCmp, pAnySel, cmpBytes, cmpArgL, cmovBytes, cmovArgL ] of +for ( [pAnyCmp, pAnySel, cmpArgL, cmovArgL ] of [ // r, r ['$pReg1', '$pReg2', - '4. .. ..', '%r.+', '4. .. .. ..', '%r.+'], + '%r.+', '%r.+'], // r, m ['$pReg1', '$pMem2', - '4. .. ..', '%r.+', '4. .. .. .. ..', '0x..\\(%rbp\\)'], + '%r.+', '0x..\\(%rbp\\)'], // m, r ['$pMem1', '$pReg2', - '4. .. .. ..', '0x..\\(%rbp\\)', '4. .. .. ..', '%r.+'], + '0x..\\(%rbp\\)', '%r.+'], // m, m ['$pMem1', '$pMem2', - '4. .. .. ..', '0x..\\(%rbp\\)', '4. .. .. .. ..', '0x..\\(%rbp\\)'] + '0x..\\(%rbp\\)', '0x..\\(%rbp\\)'] ] ) { codegenTestX64_adhoc( `(module @@ -328,8 +328,8 @@ for ( [pAnyCmp, pAnySel, cmpBytes, cmpArgL, cmovBytes, cmovArgL ] of // On Linux we have an extra move (getBuildConfiguration("windows") ? '' : '48 89 .. mov %r.+, %r.+\n') + // 'q*' because the disassembler shows 'q' only for the memory cases - `48 89 .. mov %r.+, %r.+ - ${cmpBytes} cmpq* ${cmpArgL}, %r.+ - ${cmovBytes} cmovnzq* ${cmovArgL}, %r.+` + `mov %r.+, %r.+ + cmpq* ${cmpArgL}, %r.+ + cmovnzq* ${cmovArgL}, %r.+` ); } diff --git a/js/src/jit-test/tests/wasm/binop-x64-ion-folding.js b/js/src/jit-test/tests/wasm/binop-x64-ion-folding.js @@ -52,161 +52,161 @@ function test64(wasm_insn, must_appear, param0, param1, expected_result, // {AND,OR,XOR}{32,64} folding: both args const test32('(i32.and (i32.const 0x12345678) (i32.const 0x0f0f0f0f))', - 'b8 08 06 04 02 mov \\$0x2040608, %eax', + 'mov \\$0x2040608, %eax', 0,0, 0x2040608); test64('(i64.and (i64.const 0x1234567851505150) (i64.const 0x515051500f0f0f0f))', - '48 b8 00 01 00 01 50 50 10 10 mov \\$0x1010505001000100, %rax', + 'mov \\$0x1010505001000100, %rax', 0n,0n, 0x1010505001000100n); test32('(i32.or (i32.const 0x12345678) (i32.const 0x0f0e0d0c))', - 'b8 7c 5f 3e 1f mov \\$0x1F3E5F7C, %eax', + 'mov \\$0x1F3E5F7C, %eax', 0,0, 0x1f3e5f7c); test64('(i64.or (i64.const 0x1234567851505150) (i64.const 0x515051500f0f1337))', - '48 b8 77 53 5f 5f 78 57 74 53 mov \\$0x537457785F5F5377, %rax', + 'mov \\$0x537457785F5F5377, %rax', 0n,0n, 0x537457785f5f5377n); test32('(i32.xor (i32.const 0x12345678) (i32.const 0x0f0e0d0c))', - 'b8 74 5b 3a 1d mov \\$0x1D3A5B74, %eax', + 'mov \\$0x1D3A5B74, %eax', 0,0, 0x1d3a5b74); test64('(i64.xor (i64.const 0x1234567851505150) (i64.const 0x515051500f0f1337))', - '48 b8 67 42 5f 5e 28 07 64 43 mov \\$0x436407285E5F4267, %rax', + 'mov \\$0x436407285E5F4267, %rax', 0n,0n, 0x436407285e5f4267n); // {AND,OR,XOR}{32,64} identities: first arg is all zeroes test32('(i32.and (i32.const 0) (local.get 1))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234,5678, 0); test64('(i64.and (i64.const 0) (local.get 1))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234n,5678n, 0n); test32('(i32.or (i32.const 0) (local.get 1))', - `8b .. mov %e.., %ecx - 8b c1 mov %ecx, %eax`, + `mov %e.., %ecx + mov %ecx, %eax`, 1234,5678, 5678); test64('(i64.or (i64.const 0) (local.get 1))', - `48 89 .. mov %r.., %rcx - 48 89 c8 mov %rcx, %rax`, + `mov %r.., %rcx + mov %rcx, %rax`, 1234n,5678n, 5678n); test32('(i32.xor (i32.const 0) (local.get 1))', - `8b .. mov %e.., %ecx - 8b c1 mov %ecx, %eax`, + `mov %e.., %ecx + mov %ecx, %eax`, 1234,5678, 5678); test64('(i64.xor (i64.const 0) (local.get 1))', - `48 89 .. mov %r.., %rcx - 48 89 c8 mov %rcx, %rax`, + `mov %r.., %rcx + mov %rcx, %rax`, 1234n,5678n, 5678n); // {AND,OR,XOR}{32,64} identities: second arg is all zeroes test32('(i32.and (local.get 0) (i32.const 0))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234,5678, 0); test64('(i64.and (local.get 0) (i64.const 0))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234n,5678n, 0n); test32('(i32.or (local.get 0) (i32.const 0))', - // 8b cf mov %edi, %ecx -- expected on Linux but not on Windows - `8b c1 mov %ecx, %eax`, + // mov %edi, %ecx -- expected on Linux but not on Windows + `mov %ecx, %eax`, 1234,5678, 1234, {no_prefix: true}); // required on Linux test64('(i64.or (local.get 0) (i64.const 0))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax`, 1234n,5678n, 1234n, {no_prefix: true}); test32('(i32.xor (local.get 0) (i32.const 0))', - // 8b cf mov %edi, %ecx -- ditto - `8b c1 mov %ecx, %eax`, + // mov %edi, %ecx -- ditto + `mov %ecx, %eax`, 1234,5678, 1234, {no_prefix: true}); test64('(i64.xor (local.get 0) (i64.const 0))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax`, 1234n,5678n, 1234n, {no_prefix: true}); // {AND,OR,XOR}{32,64} identities: first arg is all ones test32('(i32.and (i32.const 0xffffffff) (local.get 1))', - `8b .. mov %e.., %ecx - 8b c1 mov %ecx, %eax`, + `mov %e.., %ecx + mov %ecx, %eax`, 1234,5678, 5678); test64('(i64.and (i64.const 0xffffffffffffffff) (local.get 1))', - `48 89 .. mov %r.., %rcx - 48 89 c8 mov %rcx, %rax`, + `mov %r.., %rcx + mov %rcx, %rax`, 1234n,5678n, 5678n); test32('(i32.or (i32.const 0xffffffff) (local.get 1))', - 'b8 ff ff ff ff mov \\$-0x01, %eax', + 'mov \\$-0x01, %eax', 1234,5678, -1/*0xffffffff*/); test64('(i64.or (i64.const 0xffffffffffffffff) (local.get 1))', - '48 c7 c0 ff ff ff ff mov \\$-0x01, %rax', + 'mov \\$-0x01, %rax', 1234n,5678n, -1n/*0xffffffffffffffff*/); test32('(i32.xor (i32.const 0xffffffff) (local.get 1))', - `8b .. mov %e.., %ecx - 8b c1 mov %ecx, %eax - f7 d0 not %eax`, + `mov %e.., %ecx + mov %ecx, %eax + not %eax`, 1234,5678, -5679); test64('(i64.xor (i64.const 0xffffffffffffffff) (local.get 1))', - `48 89 .. mov %r.., %rcx - 48 89 c8 mov %rcx, %rax - 48 f7 d0 not %rax`, + `mov %r.., %rcx + mov %rcx, %rax + not %rax`, 1234n,5678n, -5679n); // {AND,OR,XOR}{32,64} identities: second arg is all ones test32('(i32.and (local.get 0) (i32.const 0xffffffff))', - // 8b cf mov %edi, %ecx -- expected on Linux but not on Windows - `8b c1 mov %ecx, %eax`, + // mov %edi, %ecx -- expected on Linux but not on Windows + `mov %ecx, %eax`, 1234,5678, 1234, {no_prefix: true}); // required on Linux test64('(i64.and (local.get 0) (i64.const 0xffffffffffffffff))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax`, 1234n,5678n, 1234n, {no_prefix: true}); test32('(i32.or (local.get 0) (i32.const 0xffffffff))', - 'b8 ff ff ff ff mov \\$-0x01, %eax', + 'mov \\$-0x01, %eax', 1234,5678, -1/*0xffffffff*/); test64('(i64.or (local.get 0) (i64.const 0xffffffffffffffff))', - '48 c7 c0 ff ff ff ff mov \\$-0x01, %rax', + 'mov \\$-0x01, %rax', 1234n,5678n, -1n/*0xffffffffffffffff*/); test32('(i32.xor (local.get 0) (i32.const 0xffffffff))', - // 8b cf mov %edi, %ecx -- ditto - `8b c1 mov %ecx, %eax - f7 d0 not %eax`, + // mov %edi, %ecx -- ditto + `mov %ecx, %eax + not %eax`, 1234,5678, -1235, {no_prefix: true}); test64('(i64.xor (local.get 0) (i64.const 0xffffffffffffffff))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax - 48 f7 d0 not %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax + not %rax`, 1234n,5678n, -1235n, {no_prefix: true}); // {AND,OR,XOR}{32,64} identities: both args the same test32('(i32.and (local.get 0) (local.get 0))', - // 8b cf mov %edi, %ecx -- ditto - `8b c1 mov %ecx, %eax`, + // mov %edi, %ecx -- ditto + `mov %ecx, %eax`, 1234,5678, 1234, {no_prefix: true}); test64('(i64.and (local.get 0) (local.get 0))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax`, 1234n,5678n, 1234n, {no_prefix: true}); test32('(i32.or (local.get 0) (local.get 0))', - // 8b cf mov %edi, %ecx -- ditto - `8b c1 mov %ecx, %eax`, + // mov %edi, %ecx -- ditto + `mov %ecx, %eax`, 1234,5678, 1234, {no_prefix: true}); test64('(i64.or (local.get 0) (local.get 0))', - // 48 89 f9 mov %rdi, %rcx -- ditto - `48 89 c8 mov %rcx, %rax`, + // mov %rdi, %rcx -- ditto + `mov %rcx, %rax`, 1234n,5678n, 1234n, {no_prefix: true}); test32('(i32.xor (local.get 0) (local.get 0))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234,5678, 0); test64('(i64.xor (local.get 0) (local.get 0))', - '33 c0 xor %eax, %eax', + 'xor %eax, %eax', 1234n,5678n, 0n); diff --git a/js/src/jit-test/tests/wasm/builtin-modules/js-string/inline-code.js b/js/src/jit-test/tests/wasm/builtin-modules/js-string/inline-code.js @@ -10,12 +10,12 @@ codegenTestARM64_adhoc(` ) (export "test" (func $testImp))`, 'test', - `92400401 and x1, x0, #0x3 - 7100083f cmp w1, #0x2 \\(2\\) - 54000060 b\\.eq #\\+0xc \\(addr .*\\) - 52800000 mov w0, #0x0 - 14000002 b #\\+0x8 \\(addr .*\\) - 52800020 mov w0, #0x1`, + `and x1, x0, #0x3 + cmp w1, #0x2 \\(2\\) + b\\.eq #\\+0xc \\(addr .*\\) + mov w0, #0x0 + b #\\+0x8 \\(addr .*\\) + mov w0, #0x1`, {features: {builtins: ["js-string"]}} ); @@ -27,9 +27,9 @@ codegenTestARM64_adhoc(` ) (export "cast" (func $castImp))`, 'cast', - `92400401 and x1, x0, #0x3 - 7100083f cmp w1, #0x2 \\(2\\) - 54000040 b.eq #\\+0x8 \\(addr .*\\) - d4a00000 unimplemented \\(Exception\\)`, + `and x1, x0, #0x3 + cmp w1, #0x2 \\(2\\) + b.eq #\\+0x8 \\(addr .*\\) + unimplemented \\(Exception\\)`, {features: {builtins: ["js-string"]}} ); diff --git a/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js b/js/src/jit-test/tests/wasm/ion-adhoc-multiplatform.js @@ -39,12 +39,12 @@ codegenTestMultiplatform_adhoc( // from the latter problem, but on x86 no_prefix_x86:true // hides it, and on arm32/64 the pointless move is correctly // transformed by RA into a no-op. - `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - 33 c0 xor %eax, %eax`, - x86: `33 c0 xor %eax, %eax`, - arm64: `2a1f03e0 mov w0, wzr`, - arm: `e3a00000 mov r0, #0`}, + `mov %edi, %ecx + mov %ecx, %eax + xor %eax, %eax`, + x86: `xor %eax, %eax`, + arm64: `mov w0, wzr`, + arm: `mov r0, #0`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( @@ -53,15 +53,15 @@ codegenTestMultiplatform_adhoc( "mul64_zeroL", // FIXME folding happened, zero-creation insns could be improved {x64: // Same shenanigans as above. Also, on xor, REX.W is redundant. - `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 33 c0 xor %rax, %rax`, - x86: `33 c0 xor %eax, %eax - 33 d2 xor %edx, %edx`, - arm64: `aa1f03e0 mov x0, xzr`, + `mov %rdi, %rcx + mov %rcx, %rax + xor %rax, %rax`, + x86: `xor %eax, %eax + xor %edx, %edx`, + arm64: `mov x0, xzr`, arm: // bizarrely inconsistent with the 32-bit case - `e0200000 eor r0, r0, r0 - e0211001 eor r1, r1, r1` }, + `eor r0, r0, r0 + eor r1, r1, r1` }, {x86: {no_prefix:true}} ); @@ -69,9 +69,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_oneL") (param $p1 i32) (result i32) (i32.mul (i32.const 1) (local.get $p1))))`, "mul32_oneL", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: ``}, {x86: {no_prefix:true}} @@ -80,10 +80,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_oneL") (param $p1 i64) (result i64) (i64.mul (i64.const 1) (local.get $p1))))`, "mul64_oneL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: ``}, {x86: {no_prefix:true}} @@ -93,25 +93,25 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_minusOneL") (param $p1 i32) (result i32) (i32.mul (i32.const -1) (local.get $p1))))`, "mul32_minusOneL", - {x64: `f7 d8 neg %eax`, - x86: `f7 d8 neg %eax`, - arm64: `4b0003e0 neg w0, w0`, - arm: `e2600000 rsb r0, r0, #0`}, + {x64: `neg %eax`, + x86: `neg %eax`, + arm64: `neg w0, w0`, + arm: `rsb r0, r0, #0`}, {x86: {no_prefix:true}, x64: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_minusOneL") (param $p1 i64) (result i64) (i64.mul (i64.const -1) (local.get $p1))))`, "mul64_minusOneL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 f7 d8 neg %rax`, - x86: `f7 d8 neg %eax - 83 d2 00 adc \\$0x00, %edx - f7 da neg %edx`, - arm64: `cb0003e0 neg x0, x0`, - arm: `e2700000 rsbs r0, r0, #0 - e2e11000 rsc r1, r1, #0`}, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + neg %rax`, + x86: `neg %eax + adc \\$0x00, %edx + neg %edx`, + arm64: `neg x0, x0`, + arm: `rsbs r0, r0, #0 + rsc r1, r1, #0`}, {x86: {no_prefix:true}} ); @@ -119,29 +119,29 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_twoL") (param $p1 i32) (result i32) (i32.mul (i32.const 2) (local.get $p1))))`, "mul32_twoL", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - 03 c0 add %eax, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 c0 add %eax, %eax`, - arm64: `0b000000 add w0, w0, w0`, - arm: `e0900000 adds r0, r0, r0`}, + {x64: `mov %edi, %ecx + mov %ecx, %eax + add %eax, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + add %eax, %eax`, + arm64: `add w0, w0, w0`, + arm: `adds r0, r0, r0`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoL") (param $p1 i64) (result i64) (i64.mul (i64.const 2) (local.get $p1))))`, "mul64_twoL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 03 c0 add %rax, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 c0 add %eax, %eax - 13 d2 adc %edx, %edx`, - arm64: `8b000000 add x0, x0, x0`, - arm: `e0900000 adds r0, r0, r0 - e0a11001 adc r1, r1, r1`}, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + add %rax, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + add %eax, %eax + adc %edx, %edx`, + arm64: `add x0, x0, x0`, + arm: `adds r0, r0, r0 + adc r1, r1, r1`}, {x86: {no_prefix:true}} ); @@ -149,30 +149,30 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_fourL") (param $p1 i32) (result i32) (i32.mul (i32.const 4) (local.get $p1))))`, "mul32_fourL", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - c1 e0 02 shl \\$0x02, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - c1 e0 02 shl \\$0x02, %eax`, - arm64: `531e7400 lsl w0, w0, #2`, - arm: `e1a00100 mov r0, r0, lsl #2`}, + {x64: `mov %edi, %ecx + mov %ecx, %eax + shl \\$0x02, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + shl \\$0x02, %eax`, + arm64: `lsl w0, w0, #2`, + arm: `mov r0, r0, lsl #2`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourL") (param $p1 i64) (result i64) (i64.mul (i64.const 4) (local.get $p1))))`, "mul64_fourL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 c1 e0 02 shl \\$0x02, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - 0f a4 c2 02 shld \\$0x02, %eax, %edx - c1 e0 02 shl \\$0x02, %eax`, - arm64: `d37ef400 lsl x0, x0, #2`, - arm: `e1a01101 mov r1, r1, lsl #2 - e1811f20 orr r1, r1, r0, lsr #30 - e1a00100 mov r0, r0, lsl #2`}, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + shl \\$0x02, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + shld \\$0x02, %eax, %edx + shl \\$0x02, %eax`, + arm64: `lsl x0, x0, #2`, + arm: `mov r1, r1, lsl #2 + orr r1, r1, r0, lsr #30 + mov r0, r0, lsl #2`}, {x86: {no_prefix:true}} ); @@ -188,26 +188,26 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_zeroR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 0))))`, "mul32_zeroR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - 33 c0 xor %eax, %eax`, - x86: `33 c0 xor %eax, %eax`, - arm64: `2a1f03e0 mov w0, wzr`, - arm: `e3a00000 mov r0, #0`}, + {x64: `mov %edi, %ecx + mov %ecx, %eax + xor %eax, %eax`, + x86: `xor %eax, %eax`, + arm64: `mov w0, wzr`, + arm: `mov r0, #0`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_zeroR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 0))))`, "mul64_zeroR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 33 c0 xor %rax, %rax`, // REX.W is redundant - x86: `33 c0 xor %eax, %eax - 33 d2 xor %edx, %edx`, - arm64: `aa1f03e0 mov x0, xzr`, - arm: `e0200000 eor r0, r0, r0 - e0211001 eor r1, r1, r1` }, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + xor %rax, %rax`, // REX.W is redundant + x86: `xor %eax, %eax + xor %edx, %edx`, + arm64: `mov x0, xzr`, + arm: `eor r0, r0, r0 + eor r1, r1, r1` }, {x86: {no_prefix:true}} ); @@ -215,9 +215,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_oneR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 1))))`, "mul32_oneR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: ``}, {x86: {no_prefix:true}} @@ -226,10 +226,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul64_oneR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 1))))`, "mul64_oneR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: ``}, {x86: {no_prefix:true}} @@ -239,25 +239,25 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_minusOneR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const -1))))`, "mul32_minusOneR", - {x64: `f7 d8 neg %eax`, - x86: `f7 d8 neg %eax`, - arm64: `4b0003e0 neg w0, w0`, - arm: `e2600000 rsb r0, r0, #0`}, + {x64: `neg %eax`, + x86: `neg %eax`, + arm64: `neg w0, w0`, + arm: `rsb r0, r0, #0`}, {x86: {no_prefix:true}, x64: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_minusOneR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const -1))))`, "mul64_minusOneR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 f7 d8 neg %rax`, - x86: `f7 d8 neg %eax - 83 d2 00 adc \\$0x00, %edx - f7 da neg %edx`, - arm64: `cb0003e0 neg x0, x0`, - arm: `e2700000 rsbs r0, r0, #0 - e2e11000 rsc r1, r1, #0`}, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + neg %rax`, + x86: `neg %eax + adc \\$0x00, %edx + neg %edx`, + arm64: `neg x0, x0`, + arm: `rsbs r0, r0, #0 + rsc r1, r1, #0`}, {x86: {no_prefix:true}} ); @@ -265,29 +265,29 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_twoR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 2))))`, "mul32_twoR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - 03 c0 add %eax, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 c0 add %eax, %eax`, - arm64: `0b000000 add w0, w0, w0`, - arm: `e0900000 adds r0, r0, r0`}, + {x64: `mov %edi, %ecx + mov %ecx, %eax + add %eax, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + add %eax, %eax`, + arm64: `add w0, w0, w0`, + arm: `adds r0, r0, r0`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_twoR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 2))))`, "mul64_twoR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 03 c0 add %rax, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 c0 add %eax, %eax - 13 d2 adc %edx, %edx`, - arm64: `8b000000 add x0, x0, x0`, - arm: `e0900000 adds r0, r0, r0 - e0a11001 adc r1, r1, r1`}, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + add %rax, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + add %eax, %eax + adc %edx, %edx`, + arm64: `add x0, x0, x0`, + arm: `adds r0, r0, r0 + adc r1, r1, r1`}, {x86: {no_prefix:true}} ); @@ -295,30 +295,30 @@ codegenTestMultiplatform_adhoc( `(module (func (export "mul32_fourR") (param $p1 i32) (result i32) (i32.mul (local.get $p1) (i32.const 4))))`, "mul32_fourR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - c1 e0 02 shl \\$0x02, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - c1 e0 02 shl \\$0x02, %eax`, - arm64: `531e7400 lsl w0, w0, #2`, - arm: `e1a00100 mov r0, r0, lsl #2`}, + {x64: `mov %edi, %ecx + mov %ecx, %eax + shl \\$0x02, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + shl \\$0x02, %eax`, + arm64: `lsl w0, w0, #2`, + arm: `mov r0, r0, lsl #2`}, {x86: {no_prefix:true}} ); codegenTestMultiplatform_adhoc( `(module (func (export "mul64_fourR") (param $p1 i64) (result i64) (i64.mul (local.get $p1) (i64.const 4))))`, "mul64_fourR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 c1 e0 02 shl \\$0x02, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - 0f a4 c2 02 shld \\$0x02, %eax, %edx - c1 e0 02 shl \\$0x02, %eax`, - arm64: `d37ef400 lsl x0, x0, #2`, - arm: `e1a01101 mov r1, r1, lsl #2 - e1811f20 orr r1, r1, r0, lsr #30 - e1a00100 mov r0, r0, lsl #2` + {x64: `mov %rdi, %rcx + mov %rcx, %rax + shl \\$0x02, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + shld \\$0x02, %eax, %edx + shl \\$0x02, %eax`, + arm64: `lsl x0, x0, #2`, + arm: `mov r1, r1, lsl #2 + orr r1, r1, r0, lsr #30 + mov r0, r0, lsl #2` }, {x86: {no_prefix:true}} ); @@ -332,16 +332,16 @@ codegenTestMultiplatform_adhoc( (i32.shl (local.get $p1) (i32.const 0))))`, "shl32_zeroR", // FIXME check these are consistently folded out at the MIR level - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: // Regalloc badness, plus not folded out at the MIR level - `2a0003e2 mov w2, w0 - 2a0203e1 mov w1, w2 - 53007c20 lsr w0, w1, #0`, // Uhh. lsr ?! - arm: `e1a02000 mov r2, r0 - e1a01002 mov r1, r2 - e1a00001 mov r0, r1` + `mov w2, w0 + mov w1, w2 + lsr w0, w1, #0`, // Uhh. lsr ?! + arm: `mov r2, r0 + mov r1, r2 + mov r0, r1` }, {x86: {no_prefix:true}} ); @@ -350,10 +350,10 @@ codegenTestMultiplatform_adhoc( (i64.shl (local.get $p1) (i64.const 0))))`, "shl64_zeroR", // FIXME why is this code so much better than the 32-bit case? - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, // no-op arm: `` // no-op }, @@ -364,15 +364,15 @@ codegenTestMultiplatform_adhoc( `(module (func (export "shrU32_zeroR") (param $p1 i32) (result i32) (i32.shr_u (local.get $p1) (i32.const 0))))`, "shrU32_zeroR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, - arm64: `2a0003e2 mov w2, w0 - 2a0203e1 mov w1, w2 - 2a0103e0 mov w0, w1`, - arm: `e1a02000 mov r2, r0 - e1a01002 mov r1, r2 - e1a00001 mov r0, r1` + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, + arm64: `mov w2, w0 + mov w1, w2 + mov w0, w1`, + arm: `mov r2, r0 + mov r1, r2 + mov r0, r1` }, {x86: {no_prefix:true}} ); @@ -380,10 +380,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "shrU64_zeroR") (param $p1 i64) (result i64) (i64.shr_u (local.get $p1) (i64.const 0))))`, "shrU64_zeroR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -394,15 +394,15 @@ codegenTestMultiplatform_adhoc( `(module (func (export "shrS32_zeroR") (param $p1 i32) (result i32) (i32.shr_s (local.get $p1) (i32.const 0))))`, "shrS32_zeroR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, - arm64: `2a0003e2 mov w2, w0 - 2a0203e1 mov w1, w2 - 13007c20 sbfx w0, w1, #0, #32`, - arm: `e1a02000 mov r2, r0 - e1a01002 mov r1, r2 - e1a00001 mov r0, r1` + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, + arm64: `mov w2, w0 + mov w1, w2 + sbfx w0, w1, #0, #32`, + arm: `mov r2, r0 + mov r1, r2 + mov r0, r1` }, {x86: {no_prefix:true}} ); @@ -410,10 +410,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "shrS64_zeroR") (param $p1 i64) (result i64) (i64.shr_s (local.get $p1) (i64.const 0))))`, "shrS64_zeroR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -430,9 +430,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "add32_zeroR") (param $p1 i32) (result i32) (i32.add (local.get $p1) (i32.const 0))))`, "add32_zeroR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -442,10 +442,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "add64_zeroR") (param $p1 i64) (result i64) (i64.add (local.get $p1) (i64.const 0))))`, "add64_zeroR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -456,9 +456,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "add32_zeroL") (param $p1 i32) (result i32) (i32.add (i32.const 0) (local.get $p1))))`, "add32_zeroL", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -468,10 +468,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "add64_zeroL") (param $p1 i64) (result i64) (i64.add (i64.const 0) (local.get $p1))))`, "add64_zeroL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -482,13 +482,13 @@ codegenTestMultiplatform_adhoc( `(module (func (export "add32_self") (param $p1 i32) (result i32) (i32.add (local.get $p1) (local.get $p1))))`, "add32_self", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - 03 c1 add %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 45 10 addl 0x10\\(%rbp\\), %eax`, - arm64: `0b000000 add w0, w0, w0`, - arm: `e0900000 adds r0, r0, r0 ` + {x64: `mov %edi, %ecx + mov %ecx, %eax + add %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + addl 0x10\\(%rbp\\), %eax`, + arm64: `add w0, w0, w0`, + arm: `adds r0, r0, r0 ` }, {x86: {no_prefix:true}} ); @@ -497,27 +497,27 @@ codegenTestMultiplatform_adhoc( (i64.add (local.get $p1) (local.get $p1))))`, "add64_self", // FIXME outstandingly bad 32-bit sequences, probably due to the RA - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 03 c1 add %rcx, %rax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax + add %rcx, %rax`, x86: // -0x21524111 is 0xDEADBEEF - `8b 5d 14 movl 0x14\\(%rbp\\), %ebx - 8b 4d 10 movl 0x10\\(%rbp\\), %ecx - bf ef be ad de mov \\$-0x21524111, %edi - 8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - 03 c1 add %ecx, %eax - 13 d3 adc %ebx, %edx`, - arm64: `8b000000 add x0, x0, x0`, + `movl 0x14\\(%rbp\\), %ebx + movl 0x10\\(%rbp\\), %ecx + mov \\$-0x21524111, %edi + movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + add %ecx, %eax + adc %ebx, %edx`, + arm64: `add x0, x0, x0`, arm: // play Musical Chairs for a while - `e1a03001 mov r3, r1 - e1a02000 mov r2, r0 - e1a05003 mov r5, r3 - e1a04002 mov r4, r2 - e1a01003 mov r1, r3 - e1a00002 mov r0, r2 - e0900004 adds r0, r0, r4 - e0a11005 adc r1, r1, r5` + `mov r3, r1 + mov r2, r0 + mov r5, r3 + mov r4, r2 + mov r1, r3 + mov r0, r2 + adds r0, r0, r4 + adc r1, r1, r5` }, {x86: {no_prefix:true}} ); @@ -532,9 +532,9 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub32_zeroR") (param $p1 i32) (result i32) (i32.sub (local.get $p1) (i32.const 0))))`, "sub32_zeroR", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %edi, %ecx + mov %ecx, %eax`, + x86: `movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -544,10 +544,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub64_zeroR") (param $p1 i64) (result i64) (i64.sub (local.get $p1) (i64.const 0))))`, "sub64_zeroR", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax`, + {x64: `mov %rdi, %rcx + mov %rcx, %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax`, arm64: ``, arm: `` }, @@ -558,13 +558,13 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub32_zeroL") (param $p1 i32) (result i32) (i32.sub (i32.const 0) (local.get $p1))))`, "sub32_zeroL", - {x64: `8b cf mov %edi, %ecx - 8b c1 mov %ecx, %eax - f7 d8 neg %eax`, - x86: `8b 45 10 movl 0x10\\(%rbp\\), %eax - f7 d8 neg %eax`, - arm64: `4b0003e0 neg w0, w0 `, - arm: `e2600000 rsb r0, r0, #0` + {x64: `mov %edi, %ecx + mov %ecx, %eax + neg %eax`, + x86: `movl 0x10\\(%rbp\\), %eax + neg %eax`, + arm64: `neg w0, w0 `, + arm: `rsb r0, r0, #0` }, {x86: {no_prefix:true}} ); @@ -572,17 +572,17 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub64_zeroL") (param $p1 i64) (result i64) (i64.sub (i64.const 0) (local.get $p1))))`, "sub64_zeroL", - {x64: `48 89 f9 mov %rdi, %rcx - 48 89 c8 mov %rcx, %rax - 48 f7 d8 neg %rax`, - x86: `8b 55 14 movl 0x14\\(%rbp\\), %edx - 8b 45 10 movl 0x10\\(%rbp\\), %eax - f7 d8 neg %eax - 83 d2 00 adc \\$0x00, %edx - f7 da neg %edx`, - arm64: `cb0003e0 neg x0, x0`, - arm: `e2700000 rsbs r0, r0, #0 - e2e11000 rsc r1, r1, #0` + {x64: `mov %rdi, %rcx + mov %rcx, %rax + neg %rax`, + x86: `movl 0x14\\(%rbp\\), %edx + movl 0x10\\(%rbp\\), %eax + neg %eax + adc \\$0x00, %edx + neg %edx`, + arm64: `neg x0, x0`, + arm: `rsbs r0, r0, #0 + rsc r1, r1, #0` }, {x86: {no_prefix:true}} ); @@ -591,10 +591,10 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub32_self") (param $p1 i32) (result i32) (i32.sub (local.get $p1) (local.get $p1))))`, "sub32_self", - {x64: `33 c0 xor %eax, %eax`, - x86: `33 c0 xor %eax, %eax`, - arm64: `52800000 mov w0, #0x0`, - arm: `e3a00000 mov r0, #0` + {x64: `xor %eax, %eax`, + x86: `xor %eax, %eax`, + arm64: `mov w0, #0x0`, + arm: `mov r0, #0` }, {x86: {no_prefix:true}} ); @@ -602,12 +602,12 @@ codegenTestMultiplatform_adhoc( `(module (func (export "sub64_self") (param $p1 i64) (result i64) (i64.sub (local.get $p1) (local.get $p1))))`, "sub64_self", - {x64: `33 c0 xor %eax, %eax`, - x86: `33 c0 xor %eax, %eax - 33 d2 xor %edx, %edx`, - arm64: `d2800000 mov x0, #0x0`, - arm: `e3a00000 mov r0, #0 - e3a01000 mov r1, #0` + {x64: `xor %eax, %eax`, + x86: `xor %eax, %eax + xor %edx, %edx`, + arm64: `mov x0, #0x0`, + arm: `mov r0, #0 + mov r1, #0` }, {x86: {no_prefix:true}} ); diff --git a/js/src/jit-test/tests/wasm/lazy-tiering-codegen.js b/js/src/jit-test/tests/wasm/lazy-tiering-codegen.js @@ -53,8 +53,8 @@ let t = ` `; let expected = -`41 83 ae .. 0. 00 00 03 subl \\$0x03, 0x...\\(%r14\\) - 0f 88 .. .. 00 00 js 0x000000000000....`; +`subl \\$0x03, 0x...\\(%r14\\) + js 0x000000000000....`; codegenTestX64_adhoc(t, "f", expected, {no_prefix:true, no_suffix:true, baseline:true}); diff --git a/js/src/jit-test/tests/wasm/memory-arm64-ion-codegen.js b/js/src/jit-test/tests/wasm/memory-arm64-ion-codegen.js @@ -8,7 +8,7 @@ codegenTestARM64_adhoc( (func (export "f") (result i32) (i32.load (i32.const 4000))))`, 'f', - 'b94fa2a0 ldr w0, \\[x21, #4000\\]'); + 'ldr w0, \\[x21, #4000\\]'); codegenTestARM64_adhoc( `(module @@ -16,7 +16,7 @@ codegenTestARM64_adhoc( (func (export "f") (result i32) (i32.load offset=1000 (i32.const 3000))))`, 'f', - 'b94fa2a0 ldr w0, \\[x21, #4000\\]'); + 'ldr w0, \\[x21, #4000\\]'); codegenTestARM64_adhoc( `(module @@ -24,7 +24,7 @@ codegenTestARM64_adhoc( (func (export "f") (param i32) (i32.store (i32.const 4000) (local.get 0))))`, 'f', - 'b90fa2a0 str w0, \\[x21, #4000\\]'); + 'str w0, \\[x21, #4000\\]'); codegenTestARM64_adhoc( `(module @@ -32,7 +32,7 @@ codegenTestARM64_adhoc( (func (export "f") (param i32) (i32.store offset=1000 (i32.const 3000) (local.get 0))))`, 'f', - 'b90fa2a0 str w0, \\[x21, #4000\\]'); + 'str w0, \\[x21, #4000\\]'); // Unfriendly offsets are first loaded into a scratch register @@ -42,8 +42,8 @@ codegenTestARM64_adhoc( (func (export "f") (result i32) (i32.load (i32.const 4001))))`, 'f', - `d281f430 mov x16, #0xfa1 - b8706aa0 ldr w0, \\[x21, x16\\]`); + `mov x16, #0xfa1 + ldr w0, \\[x21, x16\\]`); codegenTestARM64_adhoc( `(module @@ -51,6 +51,6 @@ codegenTestARM64_adhoc( (func (export "f") (param i32) (i32.store (i32.const 4001) (local.get 0))))`, 'f', - `d281f430 mov x16, #0xfa1 - b8306aa0 str w0, \\[x21, x16\\]`); + `mov x16, #0xfa1 + str w0, \\[x21, x16\\]`); diff --git a/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js @@ -69,238 +69,238 @@ var GPR_I32 = "%(?:e\\w+|r\\d+d)"; // Simple binary ops: e.g. add, sub, mul codegenTestX64_v128xv128_v128_avxhack( - [['i8x16.avgr_u', `c5 f1 e0 c2 vpavgb %xmm2, %xmm1, %xmm0`], - ['i16x8.avgr_u', `c5 f1 e3 c2 vpavgw %xmm2, %xmm1, %xmm0`], - ['i8x16.add', `c5 f1 fc c2 vpaddb %xmm2, %xmm1, %xmm0`], - ['i8x16.add_sat_s', `c5 f1 ec c2 vpaddsb %xmm2, %xmm1, %xmm0`], - ['i8x16.add_sat_u', `c5 f1 dc c2 vpaddusb %xmm2, %xmm1, %xmm0`], - ['i8x16.sub', `c5 f1 f8 c2 vpsubb %xmm2, %xmm1, %xmm0`], - ['i8x16.sub_sat_s', `c5 f1 e8 c2 vpsubsb %xmm2, %xmm1, %xmm0`], - ['i8x16.sub_sat_u', `c5 f1 d8 c2 vpsubusb %xmm2, %xmm1, %xmm0`], - ['i16x8.mul', `c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0`], - ['i16x8.min_s', `c5 f1 ea c2 vpminsw %xmm2, %xmm1, %xmm0`], - ['i16x8.min_u', `c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0`], - ['i16x8.max_s', `c5 f1 ee c2 vpmaxsw %xmm2, %xmm1, %xmm0`], - ['i16x8.max_u', `c4 e2 71 3e c2 vpmaxuw %xmm2, %xmm1, %xmm0`], - ['i32x4.add', `c5 f1 fe c2 vpaddd %xmm2, %xmm1, %xmm0`], - ['i32x4.sub', `c5 f1 fa c2 vpsubd %xmm2, %xmm1, %xmm0`], - ['i32x4.mul', `c4 e2 71 40 c2 vpmulld %xmm2, %xmm1, %xmm0`], - ['i32x4.min_s', `c4 e2 71 39 c2 vpminsd %xmm2, %xmm1, %xmm0`], - ['i32x4.min_u', `c4 e2 71 3b c2 vpminud %xmm2, %xmm1, %xmm0`], - ['i32x4.max_s', `c4 e2 71 3d c2 vpmaxsd %xmm2, %xmm1, %xmm0`], - ['i32x4.max_u', `c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0`], - ['i64x2.add', `c5 f1 d4 c2 vpaddq %xmm2, %xmm1, %xmm0`], - ['i64x2.sub', `c5 f1 fb c2 vpsubq %xmm2, %xmm1, %xmm0`], + [['i8x16.avgr_u', `vpavgb %xmm2, %xmm1, %xmm0`], + ['i16x8.avgr_u', `vpavgw %xmm2, %xmm1, %xmm0`], + ['i8x16.add', `vpaddb %xmm2, %xmm1, %xmm0`], + ['i8x16.add_sat_s', `vpaddsb %xmm2, %xmm1, %xmm0`], + ['i8x16.add_sat_u', `vpaddusb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub', `vpsubb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub_sat_s', `vpsubsb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub_sat_u', `vpsubusb %xmm2, %xmm1, %xmm0`], + ['i16x8.mul', `vpmullw %xmm2, %xmm1, %xmm0`], + ['i16x8.min_s', `vpminsw %xmm2, %xmm1, %xmm0`], + ['i16x8.min_u', `vpminuw %xmm2, %xmm1, %xmm0`], + ['i16x8.max_s', `vpmaxsw %xmm2, %xmm1, %xmm0`], + ['i16x8.max_u', `vpmaxuw %xmm2, %xmm1, %xmm0`], + ['i32x4.add', `vpaddd %xmm2, %xmm1, %xmm0`], + ['i32x4.sub', `vpsubd %xmm2, %xmm1, %xmm0`], + ['i32x4.mul', `vpmulld %xmm2, %xmm1, %xmm0`], + ['i32x4.min_s', `vpminsd %xmm2, %xmm1, %xmm0`], + ['i32x4.min_u', `vpminud %xmm2, %xmm1, %xmm0`], + ['i32x4.max_s', `vpmaxsd %xmm2, %xmm1, %xmm0`], + ['i32x4.max_u', `vpmaxud %xmm2, %xmm1, %xmm0`], + ['i64x2.add', `vpaddq %xmm2, %xmm1, %xmm0`], + ['i64x2.sub', `vpsubq %xmm2, %xmm1, %xmm0`], ['i64x2.mul', ` -c5 e1 73 d1 20 vpsrlq \\$0x20, %xmm1, %xmm3 -66 0f f4 da pmuludq %xmm2, %xmm3 -c5 81 73 d2 20 vpsrlq \\$0x20, %xmm2, %xmm15 -66 44 0f f4 f9 pmuludq %xmm1, %xmm15 -66 44 0f d4 fb paddq %xmm3, %xmm15 -66 41 0f 73 f7 20 psllq \\$0x20, %xmm15 -c5 f1 f4 c2 vpmuludq %xmm2, %xmm1, %xmm0 -66 41 0f d4 c7 paddq %xmm15, %xmm0`], - ['f32x4.add', `c5 f0 58 c2 vaddps %xmm2, %xmm1, %xmm0`], - ['f32x4.sub', `c5 f0 5c c2 vsubps %xmm2, %xmm1, %xmm0`], - ['f32x4.mul', `c5 f0 59 c2 vmulps %xmm2, %xmm1, %xmm0`], - ['f32x4.div', `c5 f0 5e c2 vdivps %xmm2, %xmm1, %xmm0`], - ['f64x2.add', `c5 f1 58 c2 vaddpd %xmm2, %xmm1, %xmm0`], - ['f64x2.sub', `c5 f1 5c c2 vsubpd %xmm2, %xmm1, %xmm0`], - ['f64x2.mul', `c5 f1 59 c2 vmulpd %xmm2, %xmm1, %xmm0`], - ['f64x2.div', `c5 f1 5e c2 vdivpd %xmm2, %xmm1, %xmm0`], - ['i8x16.narrow_i16x8_s', `c5 f1 63 c2 vpacksswb %xmm2, %xmm1, %xmm0`], - ['i8x16.narrow_i16x8_u', `c5 f1 67 c2 vpackuswb %xmm2, %xmm1, %xmm0`], - ['i16x8.narrow_i32x4_s', `c5 f1 6b c2 vpackssdw %xmm2, %xmm1, %xmm0`], - ['i16x8.narrow_i32x4_u', `c4 e2 71 2b c2 vpackusdw %xmm2, %xmm1, %xmm0`], - ['i32x4.dot_i16x8_s', `c5 f1 f5 c2 vpmaddwd %xmm2, %xmm1, %xmm0`]]); +vpsrlq \\$0x20, %xmm1, %xmm3 +pmuludq %xmm2, %xmm3 +vpsrlq \\$0x20, %xmm2, %xmm15 +pmuludq %xmm1, %xmm15 +paddq %xmm3, %xmm15 +psllq \\$0x20, %xmm15 +vpmuludq %xmm2, %xmm1, %xmm0 +paddq %xmm15, %xmm0`], + ['f32x4.add', `vaddps %xmm2, %xmm1, %xmm0`], + ['f32x4.sub', `vsubps %xmm2, %xmm1, %xmm0`], + ['f32x4.mul', `vmulps %xmm2, %xmm1, %xmm0`], + ['f32x4.div', `vdivps %xmm2, %xmm1, %xmm0`], + ['f64x2.add', `vaddpd %xmm2, %xmm1, %xmm0`], + ['f64x2.sub', `vsubpd %xmm2, %xmm1, %xmm0`], + ['f64x2.mul', `vmulpd %xmm2, %xmm1, %xmm0`], + ['f64x2.div', `vdivpd %xmm2, %xmm1, %xmm0`], + ['i8x16.narrow_i16x8_s', `vpacksswb %xmm2, %xmm1, %xmm0`], + ['i8x16.narrow_i16x8_u', `vpackuswb %xmm2, %xmm1, %xmm0`], + ['i16x8.narrow_i32x4_s', `vpackssdw %xmm2, %xmm1, %xmm0`], + ['i16x8.narrow_i32x4_u', `vpackusdw %xmm2, %xmm1, %xmm0`], + ['i32x4.dot_i16x8_s', `vpmaddwd %xmm2, %xmm1, %xmm0`]]); // Simple comparison ops codegenTestX64_v128xv128_v128_avxhack( - [['i8x16.eq', `c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0`], + [['i8x16.eq', `vpcmpeqb %xmm2, %xmm1, %xmm0`], ['i8x16.ne', ` -c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i8x16.lt_s', `c5 e9 64 c1 vpcmpgtb %xmm1, %xmm2, %xmm0`], +vpcmpeqb %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i8x16.lt_s', `vpcmpgtb %xmm1, %xmm2, %xmm0`], ['i8x16.gt_u', ` -c5 f1 de c2 vpmaxub %xmm2, %xmm1, %xmm0 -66 0f 74 c2 pcmpeqb %xmm2, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i16x8.eq', `c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0`], +vpmaxub %xmm2, %xmm1, %xmm0 +pcmpeqb %xmm2, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i16x8.eq', `vpcmpeqw %xmm2, %xmm1, %xmm0`], ['i16x8.ne', ` -c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +vpcmpeqw %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.le_s', ` -c5 f1 65 c2 vpcmpgtw %xmm2, %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +vpcmpgtw %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.ge_u', ` -c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0 -66 0f 75 c2 pcmpeqw %xmm2, %xmm0`], - ['i32x4.eq', `c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0`], +vpminuw %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm2, %xmm0`], + ['i32x4.eq', `vpcmpeqd %xmm2, %xmm1, %xmm0`], ['i32x4.ne', ` -c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i32x4.lt_s', `c5 e9 66 c1 vpcmpgtd %xmm1, %xmm2, %xmm0`], +vpcmpeqd %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i32x4.lt_s', `vpcmpgtd %xmm1, %xmm2, %xmm0`], ['i32x4.gt_u', ` -c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0 -66 0f 76 c2 pcmpeqd %xmm2, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i64x2.eq', `c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0`], +vpmaxud %xmm2, %xmm1, %xmm0 +pcmpeqd %xmm2, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i64x2.eq', `vpcmpeqq %xmm2, %xmm1, %xmm0`], ['i64x2.ne', ` -c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i64x2.lt_s', `c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0`], +vpcmpeqq %xmm2, %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i64x2.lt_s', `vpcmpgtq %xmm1, %xmm2, %xmm0`], ['i64x2.ge_s', ` -c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['f32x4.eq', `c5 f0 c2 c2 00 vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`], - ['f32x4.lt', `c5 f0 c2 c2 01 vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`], - ['f32x4.ge', `c5 e8 c2 c1 02 vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`], - ['f64x2.eq', `c5 f1 c2 c2 00 vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`], - ['f64x2.lt', `c5 f1 c2 c2 01 vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`], - ['f64x2.ge', `c5 e9 c2 c1 02 vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`], - ['f32x4.pmin', `c5 e8 5d c1 vminps %xmm1, %xmm2, %xmm0`], - ['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`], - ['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`], - ['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`], +vpcmpgtq %xmm1, %xmm2, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['f32x4.eq', `vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`], + ['f32x4.lt', `vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`], + ['f32x4.ge', `vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`], + ['f64x2.eq', `vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`], + ['f64x2.lt', `vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`], + ['f64x2.ge', `vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`], + ['f32x4.pmin', `vminps %xmm1, %xmm2, %xmm0`], + ['f32x4.pmax', `vmaxps %xmm1, %xmm2, %xmm0`], + ['f64x2.pmin', `vminpd %xmm1, %xmm2, %xmm0`], + ['f64x2.pmax', `vmaxpd %xmm1, %xmm2, %xmm0`], ['i8x16.swizzle', ` -c5 69 dc 3d ${RIPRADDR} vpaddusbx ${RIPR}, %xmm2, %xmm15 -c4 c2 71 00 c7 vpshufb %xmm15, %xmm1, %xmm0`], +vpaddusbx ${RIPR}, %xmm2, %xmm15 +vpshufb %xmm15, %xmm1, %xmm0`], ['i16x8.extmul_high_i8x16_s', ` -66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15 -c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15 -66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0 -c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0 -66 41 0f d5 c7 pmullw %xmm15, %xmm0`], +palignr \\$0x08, %xmm2, %xmm15 +vpmovsxbw %xmm15, %xmm15 +palignr \\$0x08, %xmm1, %xmm0 +vpmovsxbw %xmm0, %xmm0 +pmullw %xmm15, %xmm0`], ['i32x4.extmul_low_i16x8_u', ` -c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15 -c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0 -66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`], +vpmulhuw %xmm2, %xmm1, %xmm15 +vpmullw %xmm2, %xmm1, %xmm0 +punpcklwd %xmm15, %xmm0`], ['i64x2.extmul_low_i32x4_s', ` -c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15 -c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0 -66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`], +vpshufd \\$0x10, %xmm1, %xmm15 +vpshufd \\$0x10, %xmm2, %xmm0 +pmuldq %xmm15, %xmm0`], ['i16x8.q15mulr_sat_s', ` -c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0 -c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +vpmulhrsw %xmm2, %xmm1, %xmm0 +vpcmpeqwx ${RIPR}, %xmm0, %xmm15 +pxor %xmm15, %xmm0`], ]); // Bitwise binary ops codegenTestX64_v128xv128_v128_avxhack( - [['v128.and', `c5 f1 db c2 vpand %xmm2, %xmm1, %xmm0`], - ['v128.andnot', `c5 e9 df c1 vpandn %xmm1, %xmm2, %xmm0`], - ['v128.or', `c5 f1 eb c2 vpor %xmm2, %xmm1, %xmm0`], - ['v128.xor', `c5 f1 ef c2 vpxor %xmm2, %xmm1, %xmm0`]]); + [['v128.and', `vpand %xmm2, %xmm1, %xmm0`], + ['v128.andnot', `vpandn %xmm1, %xmm2, %xmm0`], + ['v128.or', `vpor %xmm2, %xmm1, %xmm0`], + ['v128.xor', `vpxor %xmm2, %xmm1, %xmm0`]]); // Replace lane ops. codegenTestX64_adhoc(`(module (func (export "f") (param v128 v128 i32) (result v128) (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', ` -c4 .. 71 20 .. 07 vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`); +vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`); codegenTestX64_adhoc(`(module (func (export "f") (param v128 v128 i32) (result v128) (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', ` -(?:c4 .. 71|c5 f1) c4 .. 03 vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`); +vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`); codegenTestX64_adhoc(`(module (func (export "f") (param v128 v128 i32) (result v128) (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', ` -c4 .. 71 22 .. 02 vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`); +vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`); codegenTestX64_adhoc(`(module (func (export "f") (param v128 v128 i64) (result v128) (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', ` -c4 .. f1 22 .. 01 vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`); +vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`); if (isAvxPresent(2)) { codegenTestX64_T_v128_avxhack( [['i32', 'i8x16.splat', ` -c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 -c4 e2 79 78 c0 vpbroadcastb %xmm0, %xmm0`], +vmovd ${GPR_I32}, %xmm0 +vpbroadcastb %xmm0, %xmm0`], ['i32', 'i16x8.splat', ` -c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 -c4 e2 79 79 c0 vpbroadcastw %xmm0, %xmm0`], +vmovd ${GPR_I32}, %xmm0 +vpbroadcastw %xmm0, %xmm0`], ['i32', 'i32x4.splat', ` -c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 -c4 e2 79 58 c0 vpbroadcastd %xmm0, %xmm0`], +vmovd ${GPR_I32}, %xmm0 +vpbroadcastd %xmm0, %xmm0`], ['i64', 'i64x2.splat', ` -c4 e1 f9 6e .. vmovq ${GPR_I64}, %xmm0 -c4 e2 79 59 c0 vpbroadcastq %xmm0, %xmm0`], - ['f32', 'f32x4.splat', `c4 e2 79 18 c0 vbroadcastss %xmm0, %xmm0`]], {log:true}); +vmovq ${GPR_I64}, %xmm0 +vpbroadcastq %xmm0, %xmm0`], + ['f32', 'f32x4.splat', `vbroadcastss %xmm0, %xmm0`]], {log:true}); codegenTestX64_T_v128_avxhack( [['i32', 'v128.load8_splat', - 'c4 c2 79 78 04 .. vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'], + 'vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'], ['i32', 'v128.load16_splat', - 'c4 c2 79 79 04 .. vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'], + 'vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'], ['i32', 'v128.load32_splat', - 'c4 c2 79 18 04 .. vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1}); + 'vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1}); } // Using VEX during shuffle ops codegenTestX64_v128xv128_v128_avxhack([ // Identity op on second argument should generate a move ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', - 'c5 f9 6f c1 vmovdqa %xmm1, %xmm0'], + 'vmovdqa %xmm1, %xmm0'], // Broadcast a byte from first argument ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5', ` -c5 f1 60 c1 vpunpcklbw %xmm1, %xmm1, %xmm0 -c5 fa 70 c0 55 vpshufhw \\$0x55, %xmm0, %xmm0 -c5 f9 70 c0 aa vpshufd \\$0xAA, %xmm0, %xmm0`], +vpunpcklbw %xmm1, %xmm1, %xmm0 +vpshufhw \\$0x55, %xmm0, %xmm0 +vpshufd \\$0xAA, %xmm0, %xmm0`], // Broadcast a word from first argument ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5', ` -c5 fb 70 c1 aa vpshuflw \\$0xAA, %xmm1, %xmm0 -c5 f9 70 c0 00 vpshufd \\$0x00, %xmm0, %xmm0`], +vpshuflw \\$0xAA, %xmm1, %xmm0 +vpshufd \\$0x00, %xmm0, %xmm0`], // Permute words ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13', ` -c5 fb 70 c1 b1 vpshuflw \\$0xB1, %xmm1, %xmm0 -c5 fa 70 c0 b1 vpshufhw \\$0xB1, %xmm0, %xmm0`], +vpshuflw \\$0xB1, %xmm1, %xmm0 +vpshufhw \\$0xB1, %xmm0, %xmm0`], // Permute doublewords ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11', - 'c5 f9 70 c1 b1 vpshufd \\$0xB1, %xmm1, %xmm0'], + 'vpshufd \\$0xB1, %xmm1, %xmm0'], // Interleave doublewords ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23', - 'c5 f1 62 c2 vpunpckldq %xmm2, %xmm1, %xmm0'], + 'vpunpckldq %xmm2, %xmm1, %xmm0'], // Interleave quadwords ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15', - 'c5 e9 6d c1 vpunpckhqdq %xmm1, %xmm2, %xmm0'], + 'vpunpckhqdq %xmm1, %xmm2, %xmm0'], // Rotate right ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12', - `c4 e3 71 0f c1 0d vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`], + `vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`], ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11', - `c4 e3 71 0f c2 0c vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]); + `vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]); if (isAvxPresent(2)) { codegenTestX64_v128xv128_v128_avxhack([ // Broadcast low byte from second argument ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0', - 'c4 e2 79 78 c1 vpbroadcastb %xmm1, %xmm0'], + 'vpbroadcastb %xmm1, %xmm0'], // Broadcast low word from third argument ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17', - 'c4 e2 79 79 c2 vpbroadcastw %xmm2, %xmm0'], + 'vpbroadcastw %xmm2, %xmm0'], // Broadcast low doubleword from second argument ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3', - 'c4 e2 79 58 c1 vpbroadcastd %xmm1, %xmm0']]); + 'vpbroadcastd %xmm1, %xmm0']]); } // Testing AVX optimization where VPBLENDVB accepts four XMM registers as args. @@ -310,268 +310,268 @@ codegenTestX64_adhoc( (local.get 2)(local.get 3)))`, 'f', ` -66 0f 6f 0d ${RIPRADDR} movdqax ${RIPR}, %xmm1 -c4 e3 69 4c c3 10 vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`); +movdqax ${RIPR}, %xmm1 +vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`); // Constant arguments that are folded into the instruction codegenTestX64_v128xLITERAL_v128_avxhack( [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`], + `vpaddbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 f8 05 ${RIPRADDR} vpsubbx ${RIPR}, %xmm1, %xmm0`], + `vpsubbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`], + `vpaddsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`], + `vpaddusbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 e8 05 ${RIPRADDR} vpsubsbx ${RIPR}, %xmm1, %xmm0`], + `vpsubsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 d8 05 ${RIPRADDR} vpsubusbx ${RIPR}, %xmm1, %xmm0`], + `vpsubusbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`], + `vpminsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`], + `vpminubx ${RIPR}, %xmm1, %xmm0`], ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`], + `vpmaxsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`], + `vpmaxubx ${RIPR}, %xmm1, %xmm0`], ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` - c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqbx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0`], + `vpcmpgtbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` - c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpgtbx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 63 05 ${RIPRADDR} vpacksswbx ${RIPR}, %xmm1, %xmm0`], + `vpacksswbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 67 05 ${RIPRADDR} vpackuswbx ${RIPR}, %xmm1, %xmm0`], + `vpackuswbx ${RIPR}, %xmm1, %xmm0`], ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`], + `vpaddwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 f9 05 ${RIPRADDR} vpsubwx ${RIPR}, %xmm1, %xmm0`], + `vpsubwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`], + `vpmullwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`], + `vpaddswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`], + `vpadduswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 e9 05 ${RIPRADDR} vpsubswx ${RIPR}, %xmm1, %xmm0`], + `vpsubswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 d9 05 ${RIPRADDR} vpsubuswx ${RIPR}, %xmm1, %xmm0`], + `vpsubuswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`], + `vpminswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`], + `vpminuwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`], + `vpmaxswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`], + `vpmaxuwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` - c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqwx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0`], + `vpcmpgtwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` - c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpgtwx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 6b 05 ${RIPRADDR} vpackssdwx ${RIPR}, %xmm1, %xmm0`], + `vpackssdwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c4 e2 71 2b 05 ${RIPRADDR} vpackusdwx ${RIPR}, %xmm1, %xmm0`], + `vpackusdwx ${RIPR}, %xmm1, %xmm0`], ['i32x4.add', '(v128.const i32x4 1 2 1 2)', - `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`], + `vpadddx ${RIPR}, %xmm1, %xmm0`], ['i32x4.sub', '(v128.const i32x4 1 2 1 2)', - `c5 f1 fa 05 ${RIPRADDR} vpsubdx ${RIPR}, %xmm1, %xmm0`], + `vpsubdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`], + `vpmulldx ${RIPR}, %xmm1, %xmm0`], ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`], + `vpminsdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`], + `vpminudx ${RIPR}, %xmm1, %xmm0`], ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`], + `vpmaxsdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`], + `vpmaxudx ${RIPR}, %xmm1, %xmm0`], ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', - `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` - c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqdx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)', - `c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0`], + `vpcmpgtdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', ` - c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpgtdx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', - `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], + `vpmaddwdx ${RIPR}, %xmm1, %xmm0`], ['i64x2.add', '(v128.const i64x2 1 2)', - `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`], + `vpaddqx ${RIPR}, %xmm1, %xmm0`], ['i64x2.sub', '(v128.const i64x2 1 2)', - `c5 f1 fb 05 ${RIPRADDR} vpsubqx ${RIPR}, %xmm1, %xmm0`], + `vpsubqx ${RIPR}, %xmm1, %xmm0`], ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`], + `vpandx ${RIPR}, %xmm1, %xmm0`], ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`], + `vporx ${RIPR}, %xmm1, %xmm0`], ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`], + `vpxorx ${RIPR}, %xmm1, %xmm0`], ['f32x4.add', '(v128.const f32x4 1 2 3 4)', - `c5 f0 58 05 ${RIPRADDR} vaddpsx ${RIPR}, %xmm1, %xmm0`], + `vaddpsx ${RIPR}, %xmm1, %xmm0`], ['f32x4.sub', '(v128.const f32x4 1 2 3 4)', - `c5 f0 5c 05 ${RIPRADDR} vsubpsx ${RIPR}, %xmm1, %xmm0`], + `vsubpsx ${RIPR}, %xmm1, %xmm0`], ['f32x4.mul', '(v128.const f32x4 1 2 3 4)', - `c5 f0 59 05 ${RIPRADDR} vmulpsx ${RIPR}, %xmm1, %xmm0`], + `vmulpsx ${RIPR}, %xmm1, %xmm0`], ['f32x4.div', '(v128.const f32x4 1 2 3 4)', - `c5 f0 5e 05 ${RIPRADDR} vdivpsx ${RIPR}, %xmm1, %xmm0`], + `vdivpsx ${RIPR}, %xmm1, %xmm0`], ['f64x2.add', '(v128.const f64x2 1 2)', - `c5 f1 58 05 ${RIPRADDR} vaddpdx ${RIPR}, %xmm1, %xmm0`], + `vaddpdx ${RIPR}, %xmm1, %xmm0`], ['f64x2.sub', '(v128.const f64x2 1 2)', - `c5 f1 5c 05 ${RIPRADDR} vsubpdx ${RIPR}, %xmm1, %xmm0`], + `vsubpdx ${RIPR}, %xmm1, %xmm0`], ['f64x2.mul', '(v128.const f64x2 1 2)', - `c5 f1 59 05 ${RIPRADDR} vmulpdx ${RIPR}, %xmm1, %xmm0`], + `vmulpdx ${RIPR}, %xmm1, %xmm0`], ['f64x2.div', '(v128.const f64x2 1 2)', - `c5 f1 5e 05 ${RIPRADDR} vdivpdx ${RIPR}, %xmm1, %xmm0`], + `vdivpdx ${RIPR}, %xmm1, %xmm0`], ['f32x4.eq', '(v128.const f32x4 1 2 3 4)', - `c5 f0 c2 05 ${RIPRADDR} 00 vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`], + `vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`], ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', - `c5 f0 c2 05 ${RIPRADDR} 04 vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`], + `vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`], ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', - `c5 f0 c2 05 ${RIPRADDR} 01 vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`], + `vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`], ['f32x4.le', '(v128.const f32x4 1 2 3 4)', - `c5 f0 c2 05 ${RIPRADDR} 02 vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`], + `vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`], ['f64x2.eq', '(v128.const f64x2 1 2)', - `c5 f1 c2 05 ${RIPRADDR} 00 vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`], + `vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`], ['f64x2.ne', '(v128.const f64x2 1 2)', - `c5 f1 c2 05 ${RIPRADDR} 04 vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`], + `vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`], ['f64x2.lt', '(v128.const f64x2 1 2)', - `c5 f1 c2 05 ${RIPRADDR} 01 vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`], + `vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`], ['f64x2.le', '(v128.const f64x2 1 2)', - `c5 f1 c2 05 ${RIPRADDR} 02 vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]); + `vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]); // Commutative operations with constants on the lhs should generate the same // code as with the constant on the rhs. codegenTestX64_LITERALxv128_v128_avxhack( [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`], + `vpaddbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`], + `vpaddsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`], + `vpaddusbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`], + `vpminsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`], + `vpminubx ${RIPR}, %xmm1, %xmm0`], ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`], + `vpmaxsbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`], + `vpmaxubx ${RIPR}, %xmm1, %xmm0`], ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` - c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqbx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`], + `vpaddwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`], + `vpmullwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`], + `vpaddswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`], + `vpadduswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`], + `vpminswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`], + `vpminuwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`], + `vpmaxswx ${RIPR}, %xmm1, %xmm0`], ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`], + `vpmaxuwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` - c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqwx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i32x4.add', '(v128.const i32x4 1 2 1 2)', - `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`], + `vpadddx ${RIPR}, %xmm1, %xmm0`], ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`], + `vpmulldx ${RIPR}, %xmm1, %xmm0`], ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`], + `vpminsdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`], + `vpminudx ${RIPR}, %xmm1, %xmm0`], ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`], + `vpmaxsdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', - `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`], + `vpmaxudx ${RIPR}, %xmm1, %xmm0`], ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', - `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], + `vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` - c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0 - 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 - 66 41 0f ef c7 pxor %xmm15, %xmm0`], + vpcmpeqdx ${RIPR}, %xmm1, %xmm0 + pcmpeqw %xmm15, %xmm15 + pxor %xmm15, %xmm0`], ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', - `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], + `vpmaddwdx ${RIPR}, %xmm1, %xmm0`], ['i64x2.add', '(v128.const i64x2 1 2)', - `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`], + `vpaddqx ${RIPR}, %xmm1, %xmm0`], ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`], + `vpandx ${RIPR}, %xmm1, %xmm0`], ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`], + `vporx ${RIPR}, %xmm1, %xmm0`], ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`]]); + `vpxorx ${RIPR}, %xmm1, %xmm0`]]); // Shift by constant encodings codegenTestX64_v128xLITERAL_v128_avxhack( [['i8x16.shl', '(i32.const 2)', ` -c5 f1 fc c1 vpaddb %xmm1, %xmm1, %xmm0 -66 0f fc c0 paddb %xmm0, %xmm0`], +vpaddb %xmm1, %xmm1, %xmm0 +paddb %xmm0, %xmm0`], ['i8x16.shl', '(i32.const 4)', ` -c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0 -66 0f 71 f0 04 psllw \\$0x04, %xmm0`], +vpandx ${RIPR}, %xmm1, %xmm0 +psllw \\$0x04, %xmm0`], ['i16x8.shl', '(i32.const 1)', - 'c5 f9 71 f1 01 vpsllw \\$0x01, %xmm1, %xmm0'], + 'vpsllw \\$0x01, %xmm1, %xmm0'], ['i16x8.shr_s', '(i32.const 3)', - 'c5 f9 71 e1 03 vpsraw \\$0x03, %xmm1, %xmm0'], + 'vpsraw \\$0x03, %xmm1, %xmm0'], ['i16x8.shr_u', '(i32.const 2)', - 'c5 f9 71 d1 02 vpsrlw \\$0x02, %xmm1, %xmm0'], + 'vpsrlw \\$0x02, %xmm1, %xmm0'], ['i32x4.shl', '(i32.const 5)', - 'c5 f9 72 f1 05 vpslld \\$0x05, %xmm1, %xmm0'], + 'vpslld \\$0x05, %xmm1, %xmm0'], ['i32x4.shr_s', '(i32.const 2)', - 'c5 f9 72 e1 02 vpsrad \\$0x02, %xmm1, %xmm0'], + 'vpsrad \\$0x02, %xmm1, %xmm0'], ['i32x4.shr_u', '(i32.const 5)', - 'c5 f9 72 d1 05 vpsrld \\$0x05, %xmm1, %xmm0'], + 'vpsrld \\$0x05, %xmm1, %xmm0'], ['i64x2.shr_s', '(i32.const 7)', ` -c5 79 70 f9 f5 vpshufd \\$0xF5, %xmm1, %xmm15 -66 41 0f 72 e7 1f psrad \\$0x1F, %xmm15 -c4 c1 71 ef c7 vpxor %xmm15, %xmm1, %xmm0 -66 0f 73 d0 07 psrlq \\$0x07, %xmm0 -66 41 0f ef c7 pxor %xmm15, %xmm0`]]); +vpshufd \\$0xF5, %xmm1, %xmm15 +psrad \\$0x1F, %xmm15 +vpxor %xmm15, %xmm1, %xmm0 +psrlq \\$0x07, %xmm0 +pxor %xmm15, %xmm0`]]); // vpblendvp optimization when bitselect follows comparison. codegenTestX64_adhoc( @@ -580,5 +580,5 @@ codegenTestX64_adhoc( (v128.bitselect (local.get 2) (local.get 3) (i32x4.eq (local.get 0) (local.get 1)))))`, 'f', ` -66 0f 76 c1 pcmpeqd %xmm1, %xmm0 -c4 e3 61 4c c2 00 vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`); +pcmpeqd %xmm1, %xmm0 +vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js @@ -7,249 +7,249 @@ // Inputs (xmm0, xmm1) codegenTestX64_v128xPTYPE_v128( - [['f32x4.replace_lane 0', 'f32', `f3 0f 10 c1 movss %xmm1, %xmm0`], - ['f32x4.replace_lane 1', 'f32', `66 0f 3a 21 c1 10 insertps \\$0x10, %xmm1, %xmm0`], - ['f32x4.replace_lane 3', 'f32', `66 0f 3a 21 c1 30 insertps \\$0x30, %xmm1, %xmm0`], - ['f64x2.replace_lane 0', 'f64', `f2 0f 10 c1 movsd %xmm1, %xmm0`], - ['f64x2.replace_lane 1', 'f64', `66 0f c6 c1 00 shufpd \\$0x00, %xmm1, %xmm0`]] ); + [['f32x4.replace_lane 0', 'f32', `movss %xmm1, %xmm0`], + ['f32x4.replace_lane 1', 'f32', `insertps \\$0x10, %xmm1, %xmm0`], + ['f32x4.replace_lane 3', 'f32', `insertps \\$0x30, %xmm1, %xmm0`], + ['f64x2.replace_lane 0', 'f64', `movsd %xmm1, %xmm0`], + ['f64x2.replace_lane 1', 'f64', `shufpd \\$0x00, %xmm1, %xmm0`]] ); // Inputs (xmm1, xmm0) codegenTestX64_v128xv128_v128_reversed( - [['f32x4.pmin', `0f 5d c1 minps %xmm1, %xmm0`], - ['f32x4.pmax', `0f 5f c1 maxps %xmm1, %xmm0`], - ['f64x2.pmin', `66 0f 5d c1 minpd %xmm1, %xmm0`], - ['f64x2.pmax', `66 0f 5f c1 maxpd %xmm1, %xmm0`]] ); + [['f32x4.pmin', `minps %xmm1, %xmm0`], + ['f32x4.pmax', `maxps %xmm1, %xmm0`], + ['f64x2.pmin', `minpd %xmm1, %xmm0`], + ['f64x2.pmax', `maxpd %xmm1, %xmm0`]] ); // Constant arguments that are folded into the instruction codegenTestX64_v128xLITERAL_v128( [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`], + `paddbx ${RIPR}, %xmm0`], ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f f8 05 ${RIPRADDR} psubbx ${RIPR}, %xmm0`], + `psubbx ${RIPR}, %xmm0`], ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`], + `paddsbx ${RIPR}, %xmm0`], ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`], + `paddusbx ${RIPR}, %xmm0`], ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f e8 05 ${RIPRADDR} psubsbx ${RIPR}, %xmm0`], + `psubsbx ${RIPR}, %xmm0`], ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f d8 05 ${RIPRADDR} psubusbx ${RIPR}, %xmm0`], + `psubusbx ${RIPR}, %xmm0`], ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`], + `pminsbx ${RIPR}, %xmm0`], ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`], + `pminubx ${RIPR}, %xmm0`], ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`], + `pmaxsbx ${RIPR}, %xmm0`], ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`], + `pmaxubx ${RIPR}, %xmm0`], ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`], + `pcmpeqbx ${RIPR}, %xmm0`], ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` -66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqbx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0`], + `pcmpgtbx ${RIPR}, %xmm0`], ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` -66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpgtbx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 63 05 ${RIPRADDR} packsswbx ${RIPR}, %xmm0`], + `packsswbx ${RIPR}, %xmm0`], ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 67 05 ${RIPRADDR} packuswbx ${RIPR}, %xmm0`], + `packuswbx ${RIPR}, %xmm0`], ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`], + `paddwx ${RIPR}, %xmm0`], ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f f9 05 ${RIPRADDR} psubwx ${RIPR}, %xmm0`], + `psubwx ${RIPR}, %xmm0`], ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`], + `pmullwx ${RIPR}, %xmm0`], ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`], + `paddswx ${RIPR}, %xmm0`], ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`], + `padduswx ${RIPR}, %xmm0`], ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f e9 05 ${RIPRADDR} psubswx ${RIPR}, %xmm0`], + `psubswx ${RIPR}, %xmm0`], ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f d9 05 ${RIPRADDR} psubuswx ${RIPR}, %xmm0`], + `psubuswx ${RIPR}, %xmm0`], ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`], + `pminswx ${RIPR}, %xmm0`], ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`], + `pminuwx ${RIPR}, %xmm0`], ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`], + `pmaxswx ${RIPR}, %xmm0`], ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`], + `pmaxuwx ${RIPR}, %xmm0`], ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`], + `pcmpeqwx ${RIPR}, %xmm0`], ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` -66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqwx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0`], + `pcmpgtwx ${RIPR}, %xmm0`], ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` -66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpgtwx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 6b 05 ${RIPRADDR} packssdwx ${RIPR}, %xmm0`], + `packssdwx ${RIPR}, %xmm0`], ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 38 2b 05 ${RIPRADDR} packusdwx ${RIPR}, %xmm0`], + `packusdwx ${RIPR}, %xmm0`], ['i32x4.add', '(v128.const i32x4 1 2 1 2)', - `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`], + `padddx ${RIPR}, %xmm0`], ['i32x4.sub', '(v128.const i32x4 1 2 1 2)', - `66 0f fa 05 ${RIPRADDR} psubdx ${RIPR}, %xmm0`], + `psubdx ${RIPR}, %xmm0`], ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`], + `pmulldx ${RIPR}, %xmm0`], ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`], + `pminsdx ${RIPR}, %xmm0`], ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`], + `pminudx ${RIPR}, %xmm0`], ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`], + `pmaxsdx ${RIPR}, %xmm0`], ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`], + `pmaxudx ${RIPR}, %xmm0`], ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', - `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`], + `pcmpeqdx ${RIPR}, %xmm0`], ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` -66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqdx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)', - `66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0`], + `pcmpgtdx ${RIPR}, %xmm0`], ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', ` -66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpgtdx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', - `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], + `pmaddwdx ${RIPR}, %xmm0`], ['i64x2.add', '(v128.const i64x2 1 2)', - `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`], + `paddqx ${RIPR}, %xmm0`], ['i64x2.sub', '(v128.const i64x2 1 2)', - `66 0f fb 05 ${RIPRADDR} psubqx ${RIPR}, %xmm0`], + `psubqx ${RIPR}, %xmm0`], ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + `pandx ${RIPR}, %xmm0`], ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`], + `porx ${RIPR}, %xmm0`], ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], + `pxorx ${RIPR}, %xmm0`], ['f32x4.add', '(v128.const f32x4 1 2 3 4)', - `0f 58 05 ${RIPRADDR} addpsx ${RIPR}, %xmm0`], + `addpsx ${RIPR}, %xmm0`], ['f32x4.sub', '(v128.const f32x4 1 2 3 4)', - `0f 5c 05 ${RIPRADDR} subpsx ${RIPR}, %xmm0`], + `subpsx ${RIPR}, %xmm0`], ['f32x4.mul', '(v128.const f32x4 1 2 3 4)', - `0f 59 05 ${RIPRADDR} mulpsx ${RIPR}, %xmm0`], + `mulpsx ${RIPR}, %xmm0`], ['f32x4.div', '(v128.const f32x4 1 2 3 4)', - `0f 5e 05 ${RIPRADDR} divpsx ${RIPR}, %xmm0`], + `divpsx ${RIPR}, %xmm0`], ['f32x4.eq', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${RIPRADDR} 00 cmppsx \\$0x00, ${RIPR}, %xmm0`], + `cmppsx \\$0x00, ${RIPR}, %xmm0`], ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${RIPRADDR} 04 cmppsx \\$0x04, ${RIPR}, %xmm0`], + `cmppsx \\$0x04, ${RIPR}, %xmm0`], ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${RIPRADDR} 01 cmppsx \\$0x01, ${RIPR}, %xmm0`], + `cmppsx \\$0x01, ${RIPR}, %xmm0`], ['f32x4.le', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${RIPRADDR} 02 cmppsx \\$0x02, ${RIPR}, %xmm0`], + `cmppsx \\$0x02, ${RIPR}, %xmm0`], ['f64x2.add', '(v128.const f64x2 1 2)', - `66 0f 58 05 ${RIPRADDR} addpdx ${RIPR}, %xmm0`], + `addpdx ${RIPR}, %xmm0`], ['f64x2.sub', '(v128.const f64x2 1 2)', - `66 0f 5c 05 ${RIPRADDR} subpdx ${RIPR}, %xmm0`], + `subpdx ${RIPR}, %xmm0`], ['f64x2.mul', '(v128.const f64x2 1 2)', - `66 0f 59 05 ${RIPRADDR} mulpdx ${RIPR}, %xmm0`], + `mulpdx ${RIPR}, %xmm0`], ['f64x2.div', '(v128.const f64x2 1 2)', - `66 0f 5e 05 ${RIPRADDR} divpdx ${RIPR}, %xmm0`], + `divpdx ${RIPR}, %xmm0`], ['f64x2.eq', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${RIPRADDR} 00 cmppdx \\$0x00, ${RIPR}, %xmm0`], + `cmppdx \\$0x00, ${RIPR}, %xmm0`], ['f64x2.ne', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${RIPRADDR} 04 cmppdx \\$0x04, ${RIPR}, %xmm0`], + `cmppdx \\$0x04, ${RIPR}, %xmm0`], ['f64x2.lt', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${RIPRADDR} 01 cmppdx \\$0x01, ${RIPR}, %xmm0`], + `cmppdx \\$0x01, ${RIPR}, %xmm0`], ['f64x2.le', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${RIPRADDR} 02 cmppdx \\$0x02, ${RIPR}, %xmm0`]]); + `cmppdx \\$0x02, ${RIPR}, %xmm0`]]); // Commutative operations with constants on the lhs should generate the same // code as with the constant on the rhs. codegenTestX64_LITERALxv128_v128( [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`], + `paddbx ${RIPR}, %xmm0`], ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`], + `paddsbx ${RIPR}, %xmm0`], ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`], + `paddusbx ${RIPR}, %xmm0`], ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`], + `pminsbx ${RIPR}, %xmm0`], ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`], + `pminubx ${RIPR}, %xmm0`], ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`], + `pmaxsbx ${RIPR}, %xmm0`], ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`], + `pmaxubx ${RIPR}, %xmm0`], ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`], + `pcmpeqbx ${RIPR}, %xmm0`], ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` -66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqbx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`], + `paddwx ${RIPR}, %xmm0`], ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`], + `pmullwx ${RIPR}, %xmm0`], ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`], + `paddswx ${RIPR}, %xmm0`], ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`], + `padduswx ${RIPR}, %xmm0`], ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`], + `pminswx ${RIPR}, %xmm0`], ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`], + `pminuwx ${RIPR}, %xmm0`], ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`], + `pmaxswx ${RIPR}, %xmm0`], ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`], + `pmaxuwx ${RIPR}, %xmm0`], ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', - `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`], + `pcmpeqwx ${RIPR}, %xmm0`], ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` -66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqwx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i32x4.add', '(v128.const i32x4 1 2 1 2)', - `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`], + `padddx ${RIPR}, %xmm0`], ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`], + `pmulldx ${RIPR}, %xmm0`], ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`], + `pminsdx ${RIPR}, %xmm0`], ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`], + `pminudx ${RIPR}, %xmm0`], ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`], + `pmaxsdx ${RIPR}, %xmm0`], ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', - `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`], + `pmaxudx ${RIPR}, %xmm0`], ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', - `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`], + `pcmpeqdx ${RIPR}, %xmm0`], ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` -66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpeqdx ${RIPR}, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', - `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], + `pmaddwdx ${RIPR}, %xmm0`], ['i64x2.add', '(v128.const i64x2 1 2)', - `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`], + `paddqx ${RIPR}, %xmm0`], ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + `pandx ${RIPR}, %xmm0`], ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`], + `porx ${RIPR}, %xmm0`], ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', - `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`]]); + `pxorx ${RIPR}, %xmm0`]]); diff --git a/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js @@ -2,19 +2,19 @@ codegenTestX86_v128xLITERAL_v128( [['f32x4.eq', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${ABSADDR} 00 cmppsx \\$0x00, ${ABS}, %xmm0`], + `cmppsx \\$0x00, ${ABS}, %xmm0`], ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${ABSADDR} 04 cmppsx \\$0x04, ${ABS}, %xmm0`], + `cmppsx \\$0x04, ${ABS}, %xmm0`], ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${ABSADDR} 01 cmppsx \\$0x01, ${ABS}, %xmm0`], + `cmppsx \\$0x01, ${ABS}, %xmm0`], ['f32x4.le', '(v128.const f32x4 1 2 3 4)', - `0f c2 05 ${ABSADDR} 02 cmppsx \\$0x02, ${ABS}, %xmm0`], + `cmppsx \\$0x02, ${ABS}, %xmm0`], ['f64x2.eq', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${ABSADDR} 00 cmppdx \\$0x00, ${ABS}, %xmm0`], + `cmppdx \\$0x00, ${ABS}, %xmm0`], ['f64x2.ne', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${ABSADDR} 04 cmppdx \\$0x04, ${ABS}, %xmm0`], + `cmppdx \\$0x04, ${ABS}, %xmm0`], ['f64x2.lt', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${ABSADDR} 01 cmppdx \\$0x01, ${ABS}, %xmm0`], + `cmppdx \\$0x01, ${ABS}, %xmm0`], ['f64x2.le', '(v128.const f64x2 1 2)', - `66 0f c2 05 ${ABSADDR} 02 cmppdx \\$0x02, ${ABS}, %xmm0`]]); + `cmppdx \\$0x02, ${ABS}, %xmm0`]]); diff --git a/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js @@ -17,10 +17,10 @@ codegenTestX64_adhoc( (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128) (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`, 'f', -`66 0f 6f da movdqa %xmm2, %xmm3 -66 0f db c3 pand %xmm3, %xmm0 -66 0f df d9 pandn %xmm1, %xmm3 -66 0f eb c3 por %xmm3, %xmm0`); +`movdqa %xmm2, %xmm3 +pand %xmm3, %xmm0 +pandn %xmm1, %xmm3 +por %xmm3, %xmm0`); // Blend constant optimizations @@ -29,7 +29,7 @@ codegenTestX64_adhoc( (func (export "f") (param v128) (param v128) (param v128) (result v128) (v128.bitselect (local.get 0) (local.get 1) (v128.const i32x4 -1 0 0 -1))))`, 'f', - `66 0f 3a 0e c1 3c pblendw \\$0x3C, %xmm1, %xmm0`); + `pblendw \\$0x3C, %xmm1, %xmm0`); // vpblendvp optimization when bitselect follows comparison. // Non-AVX pblendvb uses xmm0 as an implicit read-only operand. @@ -39,7 +39,7 @@ codegenTestX64_adhoc( (v128.bitselect (local.get 2) (local.get 3) (i32x4.eq (local.get 0) (local.get 1)))))`, 'f', ` -66 0f 76 c1 pcmpeqd %xmm1, %xmm0 -66 0f 6f cb movdqa %xmm3, %xmm1 -66 0f 38 10 ca pblendvb %xmm2, %xmm1 -66 0f 6f c1 movdqa %xmm1, %xmm0`); +pcmpeqd %xmm1, %xmm0 +movdqa %xmm3, %xmm1 +pblendvb %xmm2, %xmm1 +movdqa %xmm1, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js @@ -7,71 +7,71 @@ // Inputs (xmm0, xmm1) codegenTestX64_v128xv128_v128( - [['i8x16.gt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`], - ['i16x8.gt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`], - ['i32x4.gt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`], + [['i8x16.gt_s', `pcmpgtb %xmm1, %xmm0`], + ['i16x8.gt_s', `pcmpgtw %xmm1, %xmm0`], + ['i32x4.gt_s', `pcmpgtd %xmm1, %xmm0`], ['i8x16.le_s', ` -66 0f 64 c1 pcmpgtb %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpgtb %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], ['i16x8.le_s', ` -66 0f 65 c1 pcmpgtw %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpgtw %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], ['i32x4.le_s', ` -66 0f 66 c1 pcmpgtd %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpgtd %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], - ['i8x16.eq', `66 0f 74 c1 pcmpeqb %xmm1, %xmm0`], - ['i16x8.eq', `66 0f 75 c1 pcmpeqw %xmm1, %xmm0`], - ['i32x4.eq', `66 0f 76 c1 pcmpeqd %xmm1, %xmm0`], + ['i8x16.eq', `pcmpeqb %xmm1, %xmm0`], + ['i16x8.eq', `pcmpeqw %xmm1, %xmm0`], + ['i32x4.eq', `pcmpeqd %xmm1, %xmm0`], ['i8x16.ne', ` -66 0f 74 c1 pcmpeqb %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpeqb %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], ['i16x8.ne', ` -66 0f 75 c1 pcmpeqw %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpeqw %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], ['i32x4.ne', ` -66 0f 76 c1 pcmpeqd %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0 +pcmpeqd %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0 `], - ['f32x4.eq', `0f c2 c1 00 cmpps \\$0x00, %xmm1, %xmm0`], - ['f32x4.ne', `0f c2 c1 04 cmpps \\$0x04, %xmm1, %xmm0`], - ['f32x4.lt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`], - ['f32x4.le', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`], - ['f64x2.eq', `66 0f c2 c1 00 cmppd \\$0x00, %xmm1, %xmm0`], - ['f64x2.ne', `66 0f c2 c1 04 cmppd \\$0x04, %xmm1, %xmm0`], - ['f64x2.lt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`], - ['f64x2.le', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] ); + ['f32x4.eq', `cmpps \\$0x00, %xmm1, %xmm0`], + ['f32x4.ne', `cmpps \\$0x04, %xmm1, %xmm0`], + ['f32x4.lt', `cmpps \\$0x01, %xmm1, %xmm0`], + ['f32x4.le', `cmpps \\$0x02, %xmm1, %xmm0`], + ['f64x2.eq', `cmppd \\$0x00, %xmm1, %xmm0`], + ['f64x2.ne', `cmppd \\$0x04, %xmm1, %xmm0`], + ['f64x2.lt', `cmppd \\$0x01, %xmm1, %xmm0`], + ['f64x2.le', `cmppd \\$0x02, %xmm1, %xmm0`]] ); // Inputs (xmm1, xmm0) because the operation reverses its arguments. codegenTestX64_v128xv128_v128_reversed( [['i8x16.ge_s', ` -66 0f 64 c1 pcmpgtb %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpgtb %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i16x8.ge_s', ` -66 0f 65 c1 pcmpgtw %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], +pcmpgtw %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], ['i32x4.ge_s', ` -66 0f 66 c1 pcmpgtd %xmm1, %xmm0 -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`], - ['i8x16.lt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`], - ['i16x8.lt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`], - ['i32x4.lt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`], - ['f32x4.gt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`], - ['f32x4.ge', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`], - ['f64x2.gt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`], - ['f64x2.ge', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] ); +pcmpgtd %xmm1, %xmm0 +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`], + ['i8x16.lt_s', `pcmpgtb %xmm1, %xmm0`], + ['i16x8.lt_s', `pcmpgtw %xmm1, %xmm0`], + ['i32x4.lt_s', `pcmpgtd %xmm1, %xmm0`], + ['f32x4.gt', `cmpps \\$0x01, %xmm1, %xmm0`], + ['f32x4.ge', `cmpps \\$0x02, %xmm1, %xmm0`], + ['f64x2.gt', `cmppd \\$0x01, %xmm1, %xmm0`], + ['f64x2.ge', `cmppd \\$0x02, %xmm1, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js @@ -5,24 +5,24 @@ codegenTestX64_unit_v128( [['v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0', - `66 0f ef c0 pxor %xmm0, %xmm0`], + `pxor %xmm0, %xmm0`], ['v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1', - `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + `pcmpeqw %xmm0, %xmm0`], ['v128.const i16x8 0 0 0 0 0 0 0 0', - `66 0f ef c0 pxor %xmm0, %xmm0`], + `pxor %xmm0, %xmm0`], ['v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1', - `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + `pcmpeqw %xmm0, %xmm0`], ['v128.const i32x4 0 0 0 0', - `66 0f ef c0 pxor %xmm0, %xmm0`], + `pxor %xmm0, %xmm0`], ['v128.const i32x4 -1 -1 -1 -1', - `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + `pcmpeqw %xmm0, %xmm0`], ['v128.const i64x2 0 0', - `66 0f ef c0 pxor %xmm0, %xmm0`], + `pxor %xmm0, %xmm0`], ['v128.const i64x2 -1 -1', - `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + `pcmpeqw %xmm0, %xmm0`], ['v128.const f32x4 0 0 0 0', // Arguably this should be xorps but that's for later - `66 0f ef c0 pxor %xmm0, %xmm0`], + `pxor %xmm0, %xmm0`], ['v128.const f64x2 0 0', // Arguably this should be xorpd but that's for later - `66 0f ef c0 pxor %xmm0, %xmm0`]] ); + `pxor %xmm0, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js @@ -11,17 +11,17 @@ codegenTestX64_v128_v128( // The movaps is dest -> scratch and needs to be here. The test is // asserting that there is not an additional (redundant) move here. ` -44 0f 28 f8 movaps %xmm0, %xmm15 -45 0f c2 ff 00 cmpps \\$0x00, %xmm15, %xmm15 -66 41 0f db c7 pand %xmm15, %xmm0`], +movaps %xmm0, %xmm15 +cmpps \\$0x00, %xmm15, %xmm15 +pand %xmm15, %xmm0`], ['i32x4.trunc_sat_f32x4_u', ` -45 0f 57 ff xorps %xmm15, %xmm15 -41 0f 5f c7 maxps %xmm15, %xmm0`], +xorps %xmm15, %xmm15 +maxps %xmm15, %xmm0`], ['f32x4.convert_i32x4_u', ` -66 45 0f ef ff pxor %xmm15, %xmm15 -66 44 0f 3a 0e f8 55 pblendw \\$0x55, %xmm0, %xmm15 -66 41 0f fa c7 psubd %xmm15, %xmm0 -45 0f 5b ff cvtdq2ps %xmm15, %xmm15`]], +pxor %xmm15, %xmm15 +pblendw \\$0x55, %xmm0, %xmm15 +psubd %xmm15, %xmm0 +cvtdq2ps %xmm15, %xmm15`]], {no_suffix:true}); diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js @@ -20,5 +20,5 @@ codegenTestX64_adhoc( (func $f (export "f") (param i32) (param i32) (param i32) (param i32) (param i32) (result v128) (i32x4.add (v128.load8x8_s (local.get 4)) (v128.load8x8_s (local.get 2)))))`, 'f', - `66 43 0f 38 20 .. .. pmovsxbwq \\(%r15,%r(8|9|10|11|12|13),1\\), %xmm[0-9]+`, + `pmovsxbwq \\(%r15,%r(8|9|10|11|12|13),1\\), %xmm[0-9]+`, {no_prefix: true, no_suffix: true, log:true}); diff --git a/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js @@ -9,26 +9,26 @@ codegenTestX64_IGNOREDxv128_v128( [['i8x16.neg', ` -66 0f ef c0 pxor %xmm0, %xmm0 -66 0f f8 c1 psubb %xmm1, %xmm0`], +pxor %xmm0, %xmm0 +psubb %xmm1, %xmm0`], ['i16x8.neg', ` -66 0f ef c0 pxor %xmm0, %xmm0 -66 0f f9 c1 psubw %xmm1, %xmm0`], +pxor %xmm0, %xmm0 +psubw %xmm1, %xmm0`], ['i32x4.neg', ` -66 0f ef c0 pxor %xmm0, %xmm0 -66 0f fa c1 psubd %xmm1, %xmm0`], +pxor %xmm0, %xmm0 +psubd %xmm1, %xmm0`], ['i64x2.neg', ` -66 0f ef c0 pxor %xmm0, %xmm0 -66 0f fb c1 psubq %xmm1, %xmm0`]] ); +pxor %xmm0, %xmm0 +psubq %xmm1, %xmm0`]] ); // Floating point negate and absolute value, and bitwise not, prefer for the // registers to be the same and guarantee that no move is inserted if so. codegenTestX64_v128_v128( - [['f32x4.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], - ['f64x2.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], - ['f32x4.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], - ['f64x2.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + [['f32x4.neg', `pxorx ${RIPR}, %xmm0`], + ['f64x2.neg', `pxorx ${RIPR}, %xmm0`], + ['f32x4.abs', `pandx ${RIPR}, %xmm0`], + ['f64x2.abs', `pandx ${RIPR}, %xmm0`], ['v128.not', ` -66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 -66 41 0f ef c7 pxor %xmm15, %xmm0`]] ); +pcmpeqw %xmm15, %xmm15 +pxor %xmm15, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js @@ -6,33 +6,33 @@ if (!isAvxPresent()) { codegenTestX64_IGNOREDxv128_v128( [['i16x8.extadd_pairwise_i8x16_s', ` -66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 -66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`], +movdqax ${RIPR}, %xmm0 +pmaddubsw %xmm1, %xmm0`], ['i16x8.extadd_pairwise_i8x16_u', ` -66 0f 6f c1 movdqa %xmm1, %xmm0 -66 0f 38 04 05 ${RIPRADDR} pmaddubswx ${RIPR}, %xmm0`], +movdqa %xmm1, %xmm0 +pmaddubswx ${RIPR}, %xmm0`], ['i32x4.extadd_pairwise_i16x8_s', ` -66 0f 6f c1 movdqa %xmm1, %xmm0 -66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], +movdqa %xmm1, %xmm0 +pmaddwdx ${RIPR}, %xmm0`], ['i32x4.extadd_pairwise_i16x8_u', ` -66 0f 6f c1 movdqa %xmm1, %xmm0 -66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0 -66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0 -66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]); +movdqa %xmm1, %xmm0 +pxorx ${RIPR}, %xmm0 +pmaddwdx ${RIPR}, %xmm0 +padddx ${RIPR}, %xmm0`]]); } else { codegenTestX64_IGNOREDxv128_v128( [['i16x8.extadd_pairwise_i8x16_s', ` -66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 -66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`], +movdqax ${RIPR}, %xmm0 +pmaddubsw %xmm1, %xmm0`], ['i16x8.extadd_pairwise_i8x16_u', ` -c4 e2 71 04 05 ${RIPRADDR} vpmaddubswx ${RIPR}, %xmm1, %xmm0`], +vpmaddubswx ${RIPR}, %xmm1, %xmm0`], ['i32x4.extadd_pairwise_i16x8_s', ` -c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], +vpmaddwdx ${RIPR}, %xmm1, %xmm0`], ['i32x4.extadd_pairwise_i16x8_u', ` -c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0 -66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0 -66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]); +vpxorx ${RIPR}, %xmm1, %xmm0 +pmaddwdx ${RIPR}, %xmm0 +padddx ${RIPR}, %xmm0`]]); } diff --git a/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js @@ -4,33 +4,33 @@ codegenTestX64_v128_i32( [['v128.any_true', ` -33 c0 xor %eax, %eax -66 0f 38 17 c0 ptest %xmm0, %xmm0 -0f 95 c0 setnz %al`], +xor %eax, %eax +ptest %xmm0, %xmm0 +setnz %al`], ['i8x16.all_true', ` -33 c0 xor %eax, %eax -66 45 0f ef ff pxor %xmm15, %xmm15 -66 44 0f 74 f8 pcmpeqb %xmm0, %xmm15 -66 45 0f 38 17 ff ptest %xmm15, %xmm15 -0f 94 c0 setz %al`], +xor %eax, %eax +pxor %xmm15, %xmm15 +pcmpeqb %xmm0, %xmm15 +ptest %xmm15, %xmm15 +setz %al`], ['i16x8.all_true', ` -33 c0 xor %eax, %eax -66 45 0f ef ff pxor %xmm15, %xmm15 -66 44 0f 75 f8 pcmpeqw %xmm0, %xmm15 -66 45 0f 38 17 ff ptest %xmm15, %xmm15 -0f 94 c0 setz %al`], +xor %eax, %eax +pxor %xmm15, %xmm15 +pcmpeqw %xmm0, %xmm15 +ptest %xmm15, %xmm15 +setz %al`], ['i32x4.all_true', ` -33 c0 xor %eax, %eax -66 45 0f ef ff pxor %xmm15, %xmm15 -66 44 0f 76 f8 pcmpeqd %xmm0, %xmm15 -66 45 0f 38 17 ff ptest %xmm15, %xmm15 -0f 94 c0 setz %al`], +xor %eax, %eax +pxor %xmm15, %xmm15 +pcmpeqd %xmm0, %xmm15 +ptest %xmm15, %xmm15 +setz %al`], ['i64x2.all_true', ` -33 c0 xor %eax, %eax -66 45 0f ef ff pxor %xmm15, %xmm15 -66 44 0f 38 29 f8 pcmpeqq %xmm0, %xmm15 -66 45 0f 38 17 ff ptest %xmm15, %xmm15 -0f 94 c0 setz %al`]], {} +xor %eax, %eax +pxor %xmm15, %xmm15 +pcmpeqq %xmm0, %xmm15 +ptest %xmm15, %xmm15 +setz %al`]], {} ) // Utils. diff --git a/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js @@ -9,18 +9,18 @@ codegenTestX64_v128xLITERAL_v128( [['i8x16.shl', '(i32.const 2)', ` -66 0f fc c0 paddb %xmm0, %xmm0 -66 0f fc c0 paddb %xmm0, %xmm0`], - ['i16x8.shl', '(i32.const 2)', `66 0f 71 f0 02 psllw \\$0x02, %xmm0`], - ['i32x4.shl', '(i32.const 2)', `66 0f 72 f0 02 pslld \\$0x02, %xmm0`], - ['i64x2.shl', '(i32.const 2)', `66 0f 73 f0 02 psllq \\$0x02, %xmm0`], +paddb %xmm0, %xmm0 +paddb %xmm0, %xmm0`], + ['i16x8.shl', '(i32.const 2)', `psllw \\$0x02, %xmm0`], + ['i32x4.shl', '(i32.const 2)', `pslld \\$0x02, %xmm0`], + ['i64x2.shl', '(i32.const 2)', `psllq \\$0x02, %xmm0`], ['i8x16.shr_u', '(i32.const 2)', ` -66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0 -66 0f 71 d0 02 psrlw \\$0x02, %xmm0`], - ['i16x8.shr_s', '(i32.const 2)', `66 0f 71 e0 02 psraw \\$0x02, %xmm0`], - ['i16x8.shr_u', '(i32.const 2)', `66 0f 71 d0 02 psrlw \\$0x02, %xmm0`], - ['i32x4.shr_s', '(i32.const 2)', `66 0f 72 e0 02 psrad \\$0x02, %xmm0`], - ['i32x4.shr_u', '(i32.const 2)', `66 0f 72 d0 02 psrld \\$0x02, %xmm0`], - ['i64x2.shr_u', '(i32.const 2)', `66 0f 73 d0 02 psrlq \\$0x02, %xmm0`]] ); +pandx ${RIPR}, %xmm0 +psrlw \\$0x02, %xmm0`], + ['i16x8.shr_s', '(i32.const 2)', `psraw \\$0x02, %xmm0`], + ['i16x8.shr_u', '(i32.const 2)', `psrlw \\$0x02, %xmm0`], + ['i32x4.shr_s', '(i32.const 2)', `psrad \\$0x02, %xmm0`], + ['i32x4.shr_u', '(i32.const 2)', `psrld \\$0x02, %xmm0`], + ['i64x2.shr_u', '(i32.const 2)', `psrlq \\$0x02, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js @@ -11,48 +11,48 @@ codegenTestX64_v128xv128_v128([ // Identity op on second argument should generate a move ['i8x16.shuffle 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31', - `66 0f 6f c1 movdqa %xmm1, %xmm0`], + `movdqa %xmm1, %xmm0`], // Broadcast a byte from first argument ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5', ` -66 0f 60 c0 punpcklbw %xmm0, %xmm0 -f3 0f 70 c0 55 pshufhw \\$0x55, %xmm0, %xmm0 -66 0f 70 c0 aa pshufd \\$0xAA, %xmm0, %xmm0`], +punpcklbw %xmm0, %xmm0 +pshufhw \\$0x55, %xmm0, %xmm0 +pshufd \\$0xAA, %xmm0, %xmm0`], // Broadcast a word from first argument ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5', ` -f2 0f 70 c0 aa pshuflw \\$0xAA, %xmm0, %xmm0 -66 0f 70 c0 00 pshufd \\$0x00, %xmm0, %xmm0`], +pshuflw \\$0xAA, %xmm0, %xmm0 +pshufd \\$0x00, %xmm0, %xmm0`], // Permute bytes ['i8x16.shuffle 2 1 4 3 6 5 8 7 10 9 12 11 14 13 0 15', ` -66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0`], +pshufbx ${RIPR}, %xmm0`], // Permute words ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13', ` -f2 0f 70 c0 b1 pshuflw \\$0xB1, %xmm0, %xmm0 -f3 0f 70 c0 b1 pshufhw \\$0xB1, %xmm0, %xmm0`], +pshuflw \\$0xB1, %xmm0, %xmm0 +pshufhw \\$0xB1, %xmm0, %xmm0`], // Permute doublewords ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11', - `66 0f 70 c0 b1 pshufd \\$0xB1, %xmm0, %xmm0`], + `pshufd \\$0xB1, %xmm0, %xmm0`], // Rotate right ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12', - `66 0f 3a 0f c0 0d palignr \\$0x0D, %xmm0, %xmm0`], + `palignr \\$0x0D, %xmm0, %xmm0`], // General shuffle + blend. The initial movdqa to scratch is unavoidable // unless we can convince the compiler that it's OK to destroy xmm1. ['i8x16.shuffle 15 29 0 1 2 1 2 0 3 4 7 8 16 8 17 9', ` -66 44 0f 6f f9 movdqa %xmm1, %xmm15 -66 44 0f 38 00 3d ${RIPRADDR} pshufbx ${RIPR}, %xmm15 -66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0 -66 41 0f eb c7 por %xmm15, %xmm0`]]); +movdqa %xmm1, %xmm15 +pshufbx ${RIPR}, %xmm15 +pshufbx ${RIPR}, %xmm0 +por %xmm15, %xmm0`]]); codegenTestX64_v128xLITERAL_v128( [// Shift left bytes, shifting in zeroes @@ -66,13 +66,13 @@ codegenTestX64_v128xLITERAL_v128( ['i8x16.shuffle 16 16 16 0 1 2 3 4 5 6 7 8 9 10 11 12', '(v128.const i32x4 0 0 0 0)', ` -66 0f 73 f8 03 pslldq \\$0x03, %xmm0`], +pslldq \\$0x03, %xmm0`], // Shift right bytes, shifting in zeroes. See above. ['i8x16.shuffle 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18', '(v128.const i32x4 0 0 0 0)', ` -66 0f 73 d8 03 psrldq \\$0x03, %xmm0`]]); +psrldq \\$0x03, %xmm0`]]); // SSE4.1 PBLENDVB instruction is using XMM0, checking if blend // operation generated as expected. @@ -82,7 +82,7 @@ codegenTestX64_adhoc( (local.get 2)(local.get 3)))`, 'f', ` -66 0f 6f ca movdqa %xmm2, %xmm1 -66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 -66 0f 38 10 cb pblendvb %xmm3, %xmm1 -66 0f 6f c1 movdqa %xmm1, %xmm0`); +movdqa %xmm2, %xmm1 +movdqax ${RIPR}, %xmm0 +pblendvb %xmm3, %xmm1 +movdqa %xmm1, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js @@ -6,8 +6,8 @@ // about this type of test case. codegenTestX64_PTYPE_v128( - [['f32x4.splat', 'f32', `0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`], - ['f64x2.splat', 'f64', `f2 0f 12 c0 movddup %xmm0, %xmm0`]] , {log:true}); + [['f32x4.splat', 'f32', `shufps \\$0x00, %xmm0, %xmm0`], + ['f64x2.splat', 'f64', `movddup %xmm0, %xmm0`]] , {log:true}); // Skip these on Win64 because the ABI differs and there's a different parameter // register, this changes not just the name slightly but the binary encoding in @@ -16,14 +16,14 @@ codegenTestX64_PTYPE_v128( if (!getBuildConfiguration("windows")) { codegenTestX64_PTYPE_v128( [['v128.load32_splat', 'i32', ` -f3 41 0f 10 04 3f movssl \\(%r15,%rdi,1\\), %xmm0 -0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`], - ['v128.load64_splat', 'i32', `f2 41 0f 12 04 3f movddupq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load8x8_s', 'i32', `66 41 0f 38 20 04 3f pmovsxbwq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load8x8_u', 'i32', `66 41 0f 38 30 04 3f pmovzxbwq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load16x4_s', 'i32', `66 41 0f 38 23 04 3f pmovsxwdq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load16x4_u', 'i32', `66 41 0f 38 33 04 3f pmovzxwdq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load32x2_s', 'i32', `66 41 0f 38 25 04 3f pmovsxdqq \\(%r15,%rdi,1\\), %xmm0`], - ['v128.load32x2_u', 'i32', `66 41 0f 38 35 04 3f pmovzxdqq \\(%r15,%rdi,1\\), %xmm0`]], +movssl \\(%r15,%rdi,1\\), %xmm0 +shufps \\$0x00, %xmm0, %xmm0`], + ['v128.load64_splat', 'i32', `movddupq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load8x8_s', 'i32', `pmovsxbwq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load8x8_u', 'i32', `pmovzxbwq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load16x4_s', 'i32', `pmovsxwdq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load16x4_u', 'i32', `pmovzxwdq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load32x2_s', 'i32', `pmovsxdqq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load32x2_u', 'i32', `pmovzxdqq \\(%r15,%rdi,1\\), %xmm0`]], {memory:1}); }