diff --git a/js/src/jit-test/tests/wasm/binary.js b/js/src/jit-test/tests/wasm/binary.js index 5dad7f6542be..9793ae230e16 100644 --- a/js/src/jit-test/tests/wasm/binary.js +++ b/js/src/jit-test/tests/wasm/binary.js @@ -306,7 +306,7 @@ if (!wasmSimdEnabled()) { 0x9a, 0xa2, 0xa5, 0xa6, 0xaf, 0xb0, 0xb2, 0xb3, 0xb4, 0xbb, 0xc0, 0xc2, 0xc5, 0xc6, 0xcf, - 0xd0, 0xd2, 0xd3, 0xd4, 0xd8, 0xd9, 0xda, 0xdb, + 0xd0, 0xd2, 0xd3, 0xd4, 0xe2, 0xee, ]; for (let i of reservedSimd) { diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js b/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js index d5bc532da943..f376a891531c 100644 --- a/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js @@ -270,6 +270,67 @@ ins.exports.i64_ne(); assertSame(get(mem64, 0, 2), [0n, -1n]); +// i64x2.lt, i64x2.gt, i64x2.le, and i64.ge + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i64_lt_s") + (v128.store (i32.const 0) + (i64x2.lt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_gt_s") + (v128.store (i32.const 0) + (i64x2.gt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_le_s") + (v128.store (i32.const 0) + (i64x2.le_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_ge_s") + (v128.store (i32.const 0) + (i64x2.ge_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +set(mem64, 2, [0n, 1n, 1n, 0n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); + +set(mem64, 2, [0n, -1n, -1n, 0n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); + +set(mem64, 2, [-2n, 2n, -1n, 1n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); + +set(mem64, 2, [-2n, 1n, -2n, 1n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [0n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, 0n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, -1n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [-1n, -1n]); + + function wasmCompile(text) { return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text))) } diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h index 09f926969fca..30f909755b9e 100644 --- a/js/src/jit/MacroAssembler.h +++ b/js/src/jit/MacroAssembler.h @@ -2633,10 +2633,17 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister temp1, FloatRegister temp2) DEFINED_ON(x86_shared); - // On x86_shared, limited to !=, == + inline void compareForEqualityInt64x2(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) + DEFINED_ON(x86_shared); + + inline void compareForOrderingInt64x2( + Assembler::Condition cond, FloatRegister rhs, FloatRegister lhsDest, + FloatRegister temp1, FloatRegister temp2) DEFINED_ON(x86_shared); + inline void compareInt64x2(Assembler::Condition cond, FloatRegister rhs, - FloatRegister lhsDest) - DEFINED_ON(x86_shared, arm64); + FloatRegister lhsDest) DEFINED_ON(arm64); inline void compareFloat32x4(Assembler::Condition cond, FloatRegister rhs, FloatRegister lhsDest) diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp index ef84273e52ef..45e19e83128c 100644 --- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -2528,10 +2528,26 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { temp2); break; case wasm::SimdOp::I64x2Eq: - masm.compareInt64x2(Assembler::Equal, rhs, lhsDest); + masm.compareForEqualityInt64x2(Assembler::Equal, rhs, lhsDest); break; case wasm::SimdOp::I64x2Ne: - masm.compareInt64x2(Assembler::NotEqual, rhs, lhsDest); + masm.compareForEqualityInt64x2(Assembler::NotEqual, rhs, lhsDest); + break; + case wasm::SimdOp::I64x2LtS: + masm.compareForOrderingInt64x2(Assembler::LessThan, rhs, lhsDest, temp1, + temp2); + break; + case wasm::SimdOp::I64x2GtS: + masm.compareForOrderingInt64x2(Assembler::GreaterThan, rhs, lhsDest, + temp1, temp2); + break; + case wasm::SimdOp::I64x2LeS: + masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, rhs, lhsDest, + temp1, temp2); + break; + case wasm::SimdOp::I64x2GeS: + masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, rhs, + lhsDest, temp1, temp2); break; case wasm::SimdOp::F32x4Eq: masm.compareFloat32x4(Assembler::Equal, rhs, lhsDest); diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp index 792e98271e11..b94339061c20 100644 --- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -914,6 +914,10 @@ void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) { case wasm::SimdOp::I32x4GtU: case wasm::SimdOp::I32x4LeU: case wasm::SimdOp::I32x4GeU: + case wasm::SimdOp::I64x2LtS: + case wasm::SimdOp::I64x2GtS: + case wasm::SimdOp::I64x2LeS: + case wasm::SimdOp::I64x2GeS: tempReg0 = tempSimd128(); tempReg1 = tempSimd128(); break; diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp index 584cf0155686..e82762efcfc1 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp @@ -677,9 +677,9 @@ void MacroAssemblerX86Shared::unsignedCompareInt32x4( } } -void MacroAssemblerX86Shared::compareInt64x2(FloatRegister lhs, Operand rhs, - Assembler::Condition cond, - FloatRegister output) { +void MacroAssemblerX86Shared::compareForEqualityInt64x2( + FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister output) { static const SimdConstant allOnes = SimdConstant::SplatX4(-1); switch (cond) { case Assembler::Condition::Equal: @@ -694,6 +694,69 @@ void MacroAssemblerX86Shared::compareInt64x2(FloatRegister lhs, Operand rhs, } } +void MacroAssemblerX86Shared::compareForOrderingInt64x2( + FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister temp1, FloatRegister temp2, FloatRegister output) { + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + // The pseudo code is for (e.g. > comparison): + // __m128i pcmpgtq_sse2 (__m128i a, __m128i b) { + // __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, + // a)); r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); return + // _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1)); + // } + // Credits to https://stackoverflow.com/a/65175746 + switch (cond) { + case Assembler::Condition::GreaterThan: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpsubq(Operand(lhs), temp1, temp1); + vpcmpeqd(rhs, temp2, temp2); + vandpd(temp2, temp1, temp1); + asMasm().moveSimd128(lhs, output); + vpcmpgtd(rhs, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + break; + case Assembler::Condition::LessThan: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpcmpgtd(Operand(lhs), temp1, temp1); + vpcmpeqd(Operand(rhs), temp2, temp2); + asMasm().moveSimd128(lhs, output); + vpsubq(rhs, output, output); + vandpd(temp2, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + break; + case Assembler::Condition::GreaterThanOrEqual: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpcmpgtd(Operand(lhs), temp1, temp1); + vpcmpeqd(Operand(rhs), temp2, temp2); + asMasm().moveSimd128(lhs, output); + vpsubq(rhs, output, output); + vandpd(temp2, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + asMasm().bitwiseXorSimd128(allOnes, lhs); + break; + case Assembler::Condition::LessThanOrEqual: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpsubq(Operand(lhs), temp1, temp1); + vpcmpeqd(rhs, temp2, temp2); + vandpd(temp2, temp1, temp1); + asMasm().moveSimd128(lhs, output); + vpcmpgtd(rhs, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + asMasm().bitwiseXorSimd128(allOnes, lhs); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + void MacroAssemblerX86Shared::compareFloat32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond, FloatRegister output) { diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h index 83272aeaa92d..e00387b21cbd 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -2281,11 +2281,20 @@ void MacroAssembler::unsignedCompareInt32x4(Assembler::Condition cond, lhsDest, temp1, temp2); } -void MacroAssembler::compareInt64x2(Assembler::Condition cond, - FloatRegister rhs, FloatRegister lhsDest) { - MOZ_ASSERT(cond == Assembler::Condition::Equal || - cond == Assembler::Condition::NotEqual); - MacroAssemblerX86Shared::compareInt64x2(lhsDest, Operand(rhs), cond, lhsDest); +void MacroAssembler::compareForEqualityInt64x2(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::compareForEqualityInt64x2(lhsDest, Operand(rhs), + cond, lhsDest); +} + +void MacroAssembler::compareForOrderingInt64x2(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest, + FloatRegister temp1, + FloatRegister temp2) { + MacroAssemblerX86Shared::compareForOrderingInt64x2( + lhsDest, Operand(rhs), cond, temp1, temp2, lhsDest); } void MacroAssembler::compareFloat32x4(Assembler::Condition cond, diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h index 041c34687d87..22d2a81944f0 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h @@ -466,10 +466,12 @@ class MacroAssemblerX86Shared : public Assembler { void unsignedCompareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond, FloatRegister output, FloatRegister tmp1, FloatRegister tmp2); - void compareInt64x2(FloatRegister lhs, Operand rhs, Assembler::Condition cond, - FloatRegister output); - void compareInt64x2(Assembler::Condition cond, const SimdConstant& rhs, - FloatRegister lhsDest); + void compareForEqualityInt64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output); + void compareForOrderingInt64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); void compareFloat32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond, FloatRegister output); void compareFloat32x4(Assembler::Condition cond, const SimdConstant& rhs, diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp index 74bc3177a7bb..f00a3e314662 100644 --- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -14515,11 +14515,24 @@ static void CmpI32x4(MacroAssembler& masm, Assembler::Condition cond, masm.compareInt32x4(cond, rs, rsd); } -static void CmpI64x2(MacroAssembler& masm, Assembler::Condition cond, - RegV128 rs, RegV128 rsd) { - masm.compareInt64x2(cond, rs, rsd); +# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) +static void CmpI64x2ForEquality(MacroAssembler& masm, Assembler::Condition cond, + RegV128 rs, RegV128 rsd) { + masm.compareForEqualityInt64x2(cond, rs, rsd); } +static void CmpI64x2ForOrdering(MacroAssembler& masm, Assembler::Condition cond, + RegV128 rs, RegV128 rsd, RegV128 temp1, + RegV128 temp2) { + masm.compareForOrderingInt64x2(cond, rs, rsd, temp1, temp2); +} +# else +static void CmpI64x2(MacroAssembler& masm, Assembler::Condition cond, + RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { + masm.compareInt64x2(cond, rs, rsd, temp1, temp2); +} +# endif // JS_CODEGEN_X86 || JS_CODEGEN_X64 + # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { @@ -16581,10 +16594,42 @@ bool BaseCompiler::emitBody() { case uint32_t(SimdOp::I32x4GeU): CHECK_NEXT( dispatchVectorComparison(CmpUI32x4, Assembler::AboveOrEqual)); +# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) + case uint32_t(SimdOp::I64x2Eq): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForEquality, + Assembler::Equal)); + case uint32_t(SimdOp::I64x2Ne): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForEquality, + Assembler::NotEqual)); + case uint32_t(SimdOp::I64x2LtS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering, + Assembler::LessThan)); + case uint32_t(SimdOp::I64x2GtS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering, + Assembler::GreaterThan)); + case uint32_t(SimdOp::I64x2LeS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering, + Assembler::LessThanOrEqual)); + case uint32_t(SimdOp::I64x2GeS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering, + Assembler::GreaterThanOrEqual)); +# else case uint32_t(SimdOp::I64x2Eq): CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::Equal)); case uint32_t(SimdOp::I64x2Ne): CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::NotEqual)); + case uint32_t(SimdOp::I64x2LtS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::LessThan)); + case uint32_t(SimdOp::I64x2GtS): + CHECK_NEXT( + dispatchVectorComparison(CmpI64x2, Assembler::GreaterThan)); + case uint32_t(SimdOp::I64x2LeS): + CHECK_NEXT( + dispatchVectorComparison(CmpI64x2, Assembler::LessThanOrEqual)); + case uint32_t(SimdOp::I64x2GeS): + CHECK_NEXT(dispatchVectorComparison(CmpI64x2, + Assembler::GreaterThanOrEqual)); +# endif // JS_CODEGEN_X86 || JS_CODEGEN_X64 case uint32_t(SimdOp::F32x4Eq): CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::Equal)); case uint32_t(SimdOp::F32x4Ne): diff --git a/js/src/wasm/WasmConstants.h b/js/src/wasm/WasmConstants.h index b000d5c861e1..3f8954ff9e77 100644 --- a/js/src/wasm/WasmConstants.h +++ b/js/src/wasm/WasmConstants.h @@ -693,10 +693,10 @@ enum class SimdOp { I64x2Mul = 0xd5, I64x2Eq = 0xd6, I64x2Ne = 0xd7, - // Unused = 0xd8 - // Unused = 0xd9 - // Unused = 0xda - // Unused = 0xdb + I64x2LtS = 0xd8, + I64x2GtS = 0xd9, + I64x2LeS = 0xda, + I64x2GeS = 0xdb, I64x2ExtMulLowSI32x4 = 0xdc, I64x2ExtMulHighSI32x4 = 0xdd, I64x2ExtMulLowUI32x4 = 0xde, diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index 83e2203af9aa..d8acb88b4c4a 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -5060,6 +5060,10 @@ static bool EmitBodyExprs(FunctionCompiler& f) { case uint32_t(SimdOp::I32x4LeU): case uint32_t(SimdOp::I32x4GeS): case uint32_t(SimdOp::I32x4GeU): + case uint32_t(SimdOp::I64x2LtS): + case uint32_t(SimdOp::I64x2GtS): + case uint32_t(SimdOp::I64x2LeS): + case uint32_t(SimdOp::I64x2GeS): case uint32_t(SimdOp::F32x4Lt): case uint32_t(SimdOp::F32x4Gt): case uint32_t(SimdOp::F32x4Le): diff --git a/js/src/wasm/WasmOpIter.cpp b/js/src/wasm/WasmOpIter.cpp index c8e8cf3731a8..e6b787c3d6f5 100644 --- a/js/src/wasm/WasmOpIter.cpp +++ b/js/src/wasm/WasmOpIter.cpp @@ -403,6 +403,10 @@ OpKind wasm::Classify(OpBytes op) { case SimdOp::I32x4GeU: case SimdOp::I64x2Eq: case SimdOp::I64x2Ne: + case SimdOp::I64x2LtS: + case SimdOp::I64x2GtS: + case SimdOp::I64x2LeS: + case SimdOp::I64x2GeS: case SimdOp::F32x4Eq: case SimdOp::F32x4Ne: case SimdOp::F32x4Lt: diff --git a/js/src/wasm/WasmValidate.cpp b/js/src/wasm/WasmValidate.cpp index 3cae0595060a..8a27bca1eb58 100644 --- a/js/src/wasm/WasmValidate.cpp +++ b/js/src/wasm/WasmValidate.cpp @@ -1078,6 +1078,10 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env, case uint32_t(SimdOp::I32x4GeU): case uint32_t(SimdOp::I64x2Eq): case uint32_t(SimdOp::I64x2Ne): + case uint32_t(SimdOp::I64x2LtS): + case uint32_t(SimdOp::I64x2GtS): + case uint32_t(SimdOp::I64x2LeS): + case uint32_t(SimdOp::I64x2GeS): case uint32_t(SimdOp::F32x4Eq): case uint32_t(SimdOp::F32x4Ne): case uint32_t(SimdOp::F32x4Lt):