Bug 1691490 - Implement SIMD i64x2.{gt,lt,ge,le}_s instructions. r=lth

Differential Revision: https://phabricator.services.mozilla.com/D106915
This commit is contained in:
Yury Delendik 2021-03-17 20:07:51 +00:00
parent 1e840180a0
commit e0eec4f887
13 changed files with 244 additions and 25 deletions

View File

@ -306,7 +306,7 @@ if (!wasmSimdEnabled()) {
0x9a, 0xa2, 0xa5, 0xa6, 0xaf,
0xb0, 0xb2, 0xb3, 0xb4, 0xbb,
0xc0, 0xc2, 0xc5, 0xc6, 0xcf,
0xd0, 0xd2, 0xd3, 0xd4, 0xd8, 0xd9, 0xda, 0xdb,
0xd0, 0xd2, 0xd3, 0xd4,
0xe2, 0xee,
];
for (let i of reservedSimd) {

View File

@ -270,6 +270,67 @@ ins.exports.i64_ne();
assertSame(get(mem64, 0, 2), [0n, -1n]);
// i64x2.lt, i64x2.gt, i64x2.le, and i64.ge
var ins = wasmEvalText(`
(module
(memory (export "mem") 1 1)
(func (export "i64_lt_s")
(v128.store (i32.const 0)
(i64x2.lt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
(func (export "i64_gt_s")
(v128.store (i32.const 0)
(i64x2.gt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
(func (export "i64_le_s")
(v128.store (i32.const 0)
(i64x2.le_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
(func (export "i64_ge_s")
(v128.store (i32.const 0)
(i64x2.ge_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`);
var mem64 = new BigInt64Array(ins.exports.mem.buffer);
set(mem64, 2, [0n, 1n, 1n, 0n]);
ins.exports.i64_lt_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
ins.exports.i64_gt_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
ins.exports.i64_le_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
ins.exports.i64_ge_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
set(mem64, 2, [0n, -1n, -1n, 0n]);
ins.exports.i64_lt_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
ins.exports.i64_gt_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
ins.exports.i64_le_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
ins.exports.i64_ge_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
set(mem64, 2, [-2n, 2n, -1n, 1n]);
ins.exports.i64_lt_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
ins.exports.i64_gt_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
ins.exports.i64_le_s();
assertSame(get(mem64, 0, 2), [-1n, 0n]);
ins.exports.i64_ge_s();
assertSame(get(mem64, 0, 2), [0n, -1n]);
set(mem64, 2, [-2n, 1n, -2n, 1n]);
ins.exports.i64_lt_s();
assertSame(get(mem64, 0, 2), [0n, 0n]);
ins.exports.i64_gt_s();
assertSame(get(mem64, 0, 2), [0n, 0n]);
ins.exports.i64_le_s();
assertSame(get(mem64, 0, 2), [-1n, -1n]);
ins.exports.i64_ge_s();
assertSame(get(mem64, 0, 2), [-1n, -1n]);
function wasmCompile(text) {
return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text)))
}

View File

@ -2633,10 +2633,17 @@ class MacroAssembler : public MacroAssemblerSpecific {
FloatRegister temp1, FloatRegister temp2)
DEFINED_ON(x86_shared);
// On x86_shared, limited to !=, ==
inline void compareForEqualityInt64x2(Assembler::Condition cond,
FloatRegister rhs,
FloatRegister lhsDest)
DEFINED_ON(x86_shared);
inline void compareForOrderingInt64x2(
Assembler::Condition cond, FloatRegister rhs, FloatRegister lhsDest,
FloatRegister temp1, FloatRegister temp2) DEFINED_ON(x86_shared);
inline void compareInt64x2(Assembler::Condition cond, FloatRegister rhs,
FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64);
FloatRegister lhsDest) DEFINED_ON(arm64);
inline void compareFloat32x4(Assembler::Condition cond, FloatRegister rhs,
FloatRegister lhsDest)

View File

@ -2528,10 +2528,26 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
temp2);
break;
case wasm::SimdOp::I64x2Eq:
masm.compareInt64x2(Assembler::Equal, rhs, lhsDest);
masm.compareForEqualityInt64x2(Assembler::Equal, rhs, lhsDest);
break;
case wasm::SimdOp::I64x2Ne:
masm.compareInt64x2(Assembler::NotEqual, rhs, lhsDest);
masm.compareForEqualityInt64x2(Assembler::NotEqual, rhs, lhsDest);
break;
case wasm::SimdOp::I64x2LtS:
masm.compareForOrderingInt64x2(Assembler::LessThan, rhs, lhsDest, temp1,
temp2);
break;
case wasm::SimdOp::I64x2GtS:
masm.compareForOrderingInt64x2(Assembler::GreaterThan, rhs, lhsDest,
temp1, temp2);
break;
case wasm::SimdOp::I64x2LeS:
masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, rhs, lhsDest,
temp1, temp2);
break;
case wasm::SimdOp::I64x2GeS:
masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, rhs,
lhsDest, temp1, temp2);
break;
case wasm::SimdOp::F32x4Eq:
masm.compareFloat32x4(Assembler::Equal, rhs, lhsDest);

View File

@ -914,6 +914,10 @@ void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
case wasm::SimdOp::I32x4GtU:
case wasm::SimdOp::I32x4LeU:
case wasm::SimdOp::I32x4GeU:
case wasm::SimdOp::I64x2LtS:
case wasm::SimdOp::I64x2GtS:
case wasm::SimdOp::I64x2LeS:
case wasm::SimdOp::I64x2GeS:
tempReg0 = tempSimd128();
tempReg1 = tempSimd128();
break;

View File

@ -677,9 +677,9 @@ void MacroAssemblerX86Shared::unsignedCompareInt32x4(
}
}
void MacroAssemblerX86Shared::compareInt64x2(FloatRegister lhs, Operand rhs,
Assembler::Condition cond,
FloatRegister output) {
void MacroAssemblerX86Shared::compareForEqualityInt64x2(
FloatRegister lhs, Operand rhs, Assembler::Condition cond,
FloatRegister output) {
static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
switch (cond) {
case Assembler::Condition::Equal:
@ -694,6 +694,69 @@ void MacroAssemblerX86Shared::compareInt64x2(FloatRegister lhs, Operand rhs,
}
}
void MacroAssemblerX86Shared::compareForOrderingInt64x2(
FloatRegister lhs, Operand rhs, Assembler::Condition cond,
FloatRegister temp1, FloatRegister temp2, FloatRegister output) {
static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
// The pseudo code is for (e.g. > comparison):
// __m128i pcmpgtq_sse2 (__m128i a, __m128i b) {
// __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b,
// a)); r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); return
// _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1));
// }
// Credits to https://stackoverflow.com/a/65175746
switch (cond) {
case Assembler::Condition::GreaterThan:
vmovdqa(rhs, temp1);
vmovdqa(Operand(lhs), temp2);
vpsubq(Operand(lhs), temp1, temp1);
vpcmpeqd(rhs, temp2, temp2);
vandpd(temp2, temp1, temp1);
asMasm().moveSimd128(lhs, output);
vpcmpgtd(rhs, output, output);
vpor(Operand(temp1), output, output);
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
break;
case Assembler::Condition::LessThan:
vmovdqa(rhs, temp1);
vmovdqa(Operand(lhs), temp2);
vpcmpgtd(Operand(lhs), temp1, temp1);
vpcmpeqd(Operand(rhs), temp2, temp2);
asMasm().moveSimd128(lhs, output);
vpsubq(rhs, output, output);
vandpd(temp2, output, output);
vpor(Operand(temp1), output, output);
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
break;
case Assembler::Condition::GreaterThanOrEqual:
vmovdqa(rhs, temp1);
vmovdqa(Operand(lhs), temp2);
vpcmpgtd(Operand(lhs), temp1, temp1);
vpcmpeqd(Operand(rhs), temp2, temp2);
asMasm().moveSimd128(lhs, output);
vpsubq(rhs, output, output);
vandpd(temp2, output, output);
vpor(Operand(temp1), output, output);
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
asMasm().bitwiseXorSimd128(allOnes, lhs);
break;
case Assembler::Condition::LessThanOrEqual:
vmovdqa(rhs, temp1);
vmovdqa(Operand(lhs), temp2);
vpsubq(Operand(lhs), temp1, temp1);
vpcmpeqd(rhs, temp2, temp2);
vandpd(temp2, temp1, temp1);
asMasm().moveSimd128(lhs, output);
vpcmpgtd(rhs, output, output);
vpor(Operand(temp1), output, output);
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
asMasm().bitwiseXorSimd128(allOnes, lhs);
break;
default:
MOZ_CRASH("unexpected condition op");
}
}
void MacroAssemblerX86Shared::compareFloat32x4(FloatRegister lhs, Operand rhs,
Assembler::Condition cond,
FloatRegister output) {

View File

@ -2281,11 +2281,20 @@ void MacroAssembler::unsignedCompareInt32x4(Assembler::Condition cond,
lhsDest, temp1, temp2);
}
void MacroAssembler::compareInt64x2(Assembler::Condition cond,
FloatRegister rhs, FloatRegister lhsDest) {
MOZ_ASSERT(cond == Assembler::Condition::Equal ||
cond == Assembler::Condition::NotEqual);
MacroAssemblerX86Shared::compareInt64x2(lhsDest, Operand(rhs), cond, lhsDest);
void MacroAssembler::compareForEqualityInt64x2(Assembler::Condition cond,
FloatRegister rhs,
FloatRegister lhsDest) {
MacroAssemblerX86Shared::compareForEqualityInt64x2(lhsDest, Operand(rhs),
cond, lhsDest);
}
void MacroAssembler::compareForOrderingInt64x2(Assembler::Condition cond,
FloatRegister rhs,
FloatRegister lhsDest,
FloatRegister temp1,
FloatRegister temp2) {
MacroAssemblerX86Shared::compareForOrderingInt64x2(
lhsDest, Operand(rhs), cond, temp1, temp2, lhsDest);
}
void MacroAssembler::compareFloat32x4(Assembler::Condition cond,

View File

@ -466,10 +466,12 @@ class MacroAssemblerX86Shared : public Assembler {
void unsignedCompareInt32x4(FloatRegister lhs, Operand rhs,
Assembler::Condition cond, FloatRegister output,
FloatRegister tmp1, FloatRegister tmp2);
void compareInt64x2(FloatRegister lhs, Operand rhs, Assembler::Condition cond,
FloatRegister output);
void compareInt64x2(Assembler::Condition cond, const SimdConstant& rhs,
FloatRegister lhsDest);
void compareForEqualityInt64x2(FloatRegister lhs, Operand rhs,
Assembler::Condition cond,
FloatRegister output);
void compareForOrderingInt64x2(FloatRegister lhs, Operand rhs,
Assembler::Condition cond, FloatRegister temp1,
FloatRegister temp2, FloatRegister output);
void compareFloat32x4(FloatRegister lhs, Operand rhs,
Assembler::Condition cond, FloatRegister output);
void compareFloat32x4(Assembler::Condition cond, const SimdConstant& rhs,

View File

@ -14515,11 +14515,24 @@ static void CmpI32x4(MacroAssembler& masm, Assembler::Condition cond,
masm.compareInt32x4(cond, rs, rsd);
}
static void CmpI64x2(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd) {
masm.compareInt64x2(cond, rs, rsd);
# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
static void CmpI64x2ForEquality(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd) {
masm.compareForEqualityInt64x2(cond, rs, rsd);
}
static void CmpI64x2ForOrdering(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd, RegV128 temp1,
RegV128 temp2) {
masm.compareForOrderingInt64x2(cond, rs, rsd, temp1, temp2);
}
# else
static void CmpI64x2(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
masm.compareInt64x2(cond, rs, rsd, temp1, temp2);
}
# endif // JS_CODEGEN_X86 || JS_CODEGEN_X64
# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
@ -16581,10 +16594,42 @@ bool BaseCompiler::emitBody() {
case uint32_t(SimdOp::I32x4GeU):
CHECK_NEXT(
dispatchVectorComparison(CmpUI32x4, Assembler::AboveOrEqual));
# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
case uint32_t(SimdOp::I64x2Eq):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForEquality,
Assembler::Equal));
case uint32_t(SimdOp::I64x2Ne):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForEquality,
Assembler::NotEqual));
case uint32_t(SimdOp::I64x2LtS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering,
Assembler::LessThan));
case uint32_t(SimdOp::I64x2GtS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering,
Assembler::GreaterThan));
case uint32_t(SimdOp::I64x2LeS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering,
Assembler::LessThanOrEqual));
case uint32_t(SimdOp::I64x2GeS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2ForOrdering,
Assembler::GreaterThanOrEqual));
# else
case uint32_t(SimdOp::I64x2Eq):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::Equal));
case uint32_t(SimdOp::I64x2Ne):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::NotEqual));
case uint32_t(SimdOp::I64x2LtS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2, Assembler::LessThan));
case uint32_t(SimdOp::I64x2GtS):
CHECK_NEXT(
dispatchVectorComparison(CmpI64x2, Assembler::GreaterThan));
case uint32_t(SimdOp::I64x2LeS):
CHECK_NEXT(
dispatchVectorComparison(CmpI64x2, Assembler::LessThanOrEqual));
case uint32_t(SimdOp::I64x2GeS):
CHECK_NEXT(dispatchVectorComparison(CmpI64x2,
Assembler::GreaterThanOrEqual));
# endif // JS_CODEGEN_X86 || JS_CODEGEN_X64
case uint32_t(SimdOp::F32x4Eq):
CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::Equal));
case uint32_t(SimdOp::F32x4Ne):

View File

@ -693,10 +693,10 @@ enum class SimdOp {
I64x2Mul = 0xd5,
I64x2Eq = 0xd6,
I64x2Ne = 0xd7,
// Unused = 0xd8
// Unused = 0xd9
// Unused = 0xda
// Unused = 0xdb
I64x2LtS = 0xd8,
I64x2GtS = 0xd9,
I64x2LeS = 0xda,
I64x2GeS = 0xdb,
I64x2ExtMulLowSI32x4 = 0xdc,
I64x2ExtMulHighSI32x4 = 0xdd,
I64x2ExtMulLowUI32x4 = 0xde,

View File

@ -5060,6 +5060,10 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
case uint32_t(SimdOp::I32x4LeU):
case uint32_t(SimdOp::I32x4GeS):
case uint32_t(SimdOp::I32x4GeU):
case uint32_t(SimdOp::I64x2LtS):
case uint32_t(SimdOp::I64x2GtS):
case uint32_t(SimdOp::I64x2LeS):
case uint32_t(SimdOp::I64x2GeS):
case uint32_t(SimdOp::F32x4Lt):
case uint32_t(SimdOp::F32x4Gt):
case uint32_t(SimdOp::F32x4Le):

View File

@ -403,6 +403,10 @@ OpKind wasm::Classify(OpBytes op) {
case SimdOp::I32x4GeU:
case SimdOp::I64x2Eq:
case SimdOp::I64x2Ne:
case SimdOp::I64x2LtS:
case SimdOp::I64x2GtS:
case SimdOp::I64x2LeS:
case SimdOp::I64x2GeS:
case SimdOp::F32x4Eq:
case SimdOp::F32x4Ne:
case SimdOp::F32x4Lt:

View File

@ -1078,6 +1078,10 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
case uint32_t(SimdOp::I32x4GeU):
case uint32_t(SimdOp::I64x2Eq):
case uint32_t(SimdOp::I64x2Ne):
case uint32_t(SimdOp::I64x2LtS):
case uint32_t(SimdOp::I64x2GtS):
case uint32_t(SimdOp::I64x2LeS):
case uint32_t(SimdOp::I64x2GeS):
case uint32_t(SimdOp::F32x4Eq):
case uint32_t(SimdOp::F32x4Ne):
case uint32_t(SimdOp::F32x4Lt):