Bug 1731856 - Prototype relaxed-SIMD min/max instructions. r=lth

See https://github.com/WebAssembly/relaxed-simd/issues/33

Differential Revision: https://phabricator.services.mozilla.com/D126390
This commit is contained in:
Yury Delendik 2021-09-27 20:31:41 +00:00
parent 99ad911a95
commit aefede4552
12 changed files with 259 additions and 5 deletions

View File

@ -137,6 +137,10 @@ const F32x4RelaxedFmaCode = 0xaf;
const F32x4RelaxedFmsCode = 0xb0;
const F64x2RelaxedFmaCode = 0xcf;
const F64x2RelaxedFmsCode = 0xd0;
const F32x4RelaxedMin = 0xb4;
const F32x4RelaxedMax = 0xe2;
const F64x2RelaxedMin = 0xd4;
const F64x2RelaxedMax = 0xee;
// SIMD wormhole opcodes.
const WORMHOLE_SELFTEST = 0;

View File

@ -10,6 +10,11 @@ function wasmEval(bytes, imports) {
return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports);
}
function wasmValidateAndEval(bytes, imports) {
assertEq(WebAssembly.validate(bytes), true, "test of WasmValidate.cpp");
return wasmEval(bytes, imports);
}
function get(arr, loc, len) {
let res = [];
for ( let i=0; i < len; i++ ) {
@ -82,7 +87,7 @@ for ( let [opcode, as, xs, ys, operator] of [[F32x4RelaxedFmaCode, fas, fxs, fys
var k = xs.length;
var ans = iota(k).map((i) => operator(as[i], xs[i], ys[i]))
var ins = wasmEval(moduleWithSections([
var ins = wasmValidateAndEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
@ -102,4 +107,82 @@ for ( let [opcode, as, xs, ys, operator] of [[F32x4RelaxedFmaCode, fas, fxs, fys
ins.exports.run();
var result = get(mem, 0, k);
assertSame(result, ans);
assertEq(false, WebAssembly.validate(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "run"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(0),
...V128Load(0),
SimdPrefix, varU32(opcode)])]})])])));
}
// Relaxed MIN/MAX, https://github.com/WebAssembly/relaxed-simd/issues/33
const Neg0 = -1/Infinity;
var minMaxTests = [
{a: 0, b: 0, min: 0, max: 0, },
{a: Neg0, b: Neg0, min: Neg0, max: Neg0, },
{a: 1/3, b: 2/3, min: 1/3, max: 2/3, },
{a: -1/3, b: -2/3, min: -2/3, max: -1/3, },
{a: -1000, b: 1, min: -1000, max: 1, },
{a: 10, b: -2, min: -2, max: 10, },
];
for (let k of [4, 2]) {
const minOpcode = k == 4 ? F32x4RelaxedMin : F64x2RelaxedMin;
const maxOpcode = k == 4 ? F32x4RelaxedMax : F64x2RelaxedMax;
var ins = wasmValidateAndEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0, 0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "min"},
{funcIndex: 1, name: "max"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
...V128Load(32),
SimdPrefix, varU32(minOpcode)])]}),
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
...V128Load(32),
SimdPrefix, varU32(maxOpcode)])]})])]));
for (let i = 0; i < minMaxTests.length; i++) {
var Ty = k == 4 ? Float32Array : Float64Array;
var mem = new Ty(ins.exports.mem.buffer);
var minResult = new Ty(k);
var maxResult = new Ty(k);
for (let j = 0; j < k; j++) {
const {a, b, min, max } = minMaxTests[(j + i) % minMaxTests.length];
mem[j + k] = a;
mem[j + k * 2] = b;
minResult[j] = min;
maxResult[j] = max;
}
ins.exports.min();
var result = get(mem, 0, k);
assertSame(result, minResult);
ins.exports.max();
var result = get(mem, 0, k);
assertSame(result, maxResult);
}
for (let op of [minOpcode, maxOpcode]) {
assertEq(false, WebAssembly.validate(moduleWithSections([
sigSection([v2vSig]),
declSection([0, 0]),
memorySection(1),
exportSection([]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(0),
SimdPrefix, varU32(op)])]})])])));
}
}

View File

@ -3485,6 +3485,30 @@ class MacroAssembler : public MacroAssemblerSpecific {
inline void fmsFloat64x2(FloatRegister src1, FloatRegister src2,
FloatRegister srcDest) DEFINED_ON(x86_shared, arm64);
inline void minFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest)
DEFINED_ON(x86_shared, arm64);
inline void minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void maxFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest)
DEFINED_ON(x86_shared, arm64);
inline void maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void minFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest)
DEFINED_ON(x86_shared, arm64);
inline void minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void maxFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest)
DEFINED_ON(x86_shared, arm64);
inline void maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
public:
// ========================================================================
// Truncate floating point.

View File

@ -3372,6 +3372,18 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
case wasm::SimdOp::I16x8Q15MulrSatS:
masm.q15MulrSatInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::F32x4RelaxedMin:
masm.minFloat32x4Relaxed(lhs, rhs, dest);
break;
case wasm::SimdOp::F32x4RelaxedMax:
masm.maxFloat32x4Relaxed(lhs, rhs, dest);
break;
case wasm::SimdOp::F64x2RelaxedMin:
masm.minFloat64x2Relaxed(lhs, rhs, dest);
break;
case wasm::SimdOp::F64x2RelaxedMax:
masm.maxFloat64x2Relaxed(lhs, rhs, dest);
break;
default:
MOZ_CRASH("Binary SimdOp not implemented");
}

View File

@ -3849,6 +3849,46 @@ void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2,
Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2));
}
void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
FloatRegister srcDest) {
Fmin(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
}
void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Fmin(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
}
void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
FloatRegister srcDest) {
Fmax(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
}
void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Fmax(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
}
void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
FloatRegister srcDest) {
Fmin(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
}
void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Fmin(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
}
void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
FloatRegister srcDest) {
Fmax(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
}
void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Fmax(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
}
//}}} check_macroassembler_style
// ===============================================================

View File

@ -2662,6 +2662,18 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
case wasm::SimdOp::I16x8Q15MulrSatS:
masm.q15MulrSatInt16x8(rhs, lhsDest);
break;
case wasm::SimdOp::F32x4RelaxedMin:
masm.minFloat32x4Relaxed(rhs, lhsDest);
break;
case wasm::SimdOp::F32x4RelaxedMax:
masm.maxFloat32x4Relaxed(rhs, lhsDest);
break;
case wasm::SimdOp::F64x2RelaxedMin:
masm.minFloat64x2Relaxed(rhs, lhsDest);
break;
case wasm::SimdOp::F64x2RelaxedMax:
masm.maxFloat64x2Relaxed(rhs, lhsDest);
break;
# ifdef ENABLE_WASM_SIMD_WORMHOLE
case wasm::SimdOp::MozWHSELFTEST:
masm.loadConstantSimd128(wasm::WormholeSignature(), lhsDest);

View File

@ -2872,6 +2872,26 @@ void MacroAssembler::fmsFloat64x2(FloatRegister src1, FloatRegister src2,
subFloat64x2(scratch, srcDest);
}
void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
FloatRegister srcDest) {
vminps(Operand(src), srcDest, srcDest);
}
void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
FloatRegister srcDest) {
vmaxps(Operand(src), srcDest, srcDest);
}
void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
FloatRegister srcDest) {
vminpd(Operand(src), srcDest, srcDest);
}
void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
FloatRegister srcDest) {
vmaxpd(Operand(src), srcDest, srcDest);
}
// ========================================================================
// Truncate floating point.

View File

@ -7389,6 +7389,23 @@ static void RelaxedFmsF64x2(MacroAssembler& masm, RegV128 rs1, RegV128 rs2,
RegV128 rsd) {
masm.fmsFloat64x2(rs1, rs2, rsd);
}
static void RelaxedMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.minFloat32x4Relaxed(rs, rsd);
}
static void RelaxedMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.maxFloat32x4Relaxed(rs, rsd);
}
static void RelaxedMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.minFloat64x2Relaxed(rs, rsd);
}
static void RelaxedMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.maxFloat64x2Relaxed(rs, rsd);
}
# endif
void BaseCompiler::emitVectorAndNot() {
@ -9008,6 +9025,26 @@ bool BaseCompiler::emitBody() {
}
CHECK_NEXT(dispatchTernary1(RelaxedFmsF64x2, ValType::V128));
break;
case uint32_t(SimdOp::F32x4RelaxedMin):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorBinary(RelaxedMinF32x4));
case uint32_t(SimdOp::F32x4RelaxedMax):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorBinary(RelaxedMaxF32x4));
case uint32_t(SimdOp::F64x2RelaxedMin):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorBinary(RelaxedMinF64x2));
case uint32_t(SimdOp::F64x2RelaxedMax):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorBinary(RelaxedMaxF64x2));
# endif
default:
break;

View File

@ -683,7 +683,7 @@ enum class SimdOp {
I32x4Sub = 0xb1,
// SubSatS = 0xb2
// SubSatU = 0xb3
// Dot = 0xb4
F32x4RelaxedMin = 0xb4,
I32x4Mul = 0xb5,
I32x4MinS = 0xb6,
I32x4MinU = 0xb7,
@ -715,7 +715,7 @@ enum class SimdOp {
I64x2Sub = 0xd1,
// Unused = 0xd2
// Unused = 0xd3
// Dot = 0xd4
F64x2RelaxedMin = 0xd4,
I64x2Mul = 0xd5,
I64x2Eq = 0xd6,
I64x2Ne = 0xd7,
@ -729,7 +729,7 @@ enum class SimdOp {
I64x2ExtMulHighUI32x4 = 0xdf,
F32x4Abs = 0xe0,
F32x4Neg = 0xe1,
// Round = 0xe2
F32x4RelaxedMax = 0xe2,
F32x4Sqrt = 0xe3,
F32x4Add = 0xe4,
F32x4Sub = 0xe5,
@ -741,7 +741,7 @@ enum class SimdOp {
F32x4PMax = 0xeb,
F64x2Abs = 0xec,
F64x2Neg = 0xed,
// Round = 0xee
F64x2RelaxedMax = 0xee,
F64x2Sqrt = 0xef,
F64x2Add = 0xf0,
F64x2Sub = 0xf1,

View File

@ -5463,6 +5463,15 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
}
CHECK(EmitTernarySimd128(f, SimdOp(op.b1)));
}
case uint32_t(SimdOp::F32x4RelaxedMin):
case uint32_t(SimdOp::F32x4RelaxedMax):
case uint32_t(SimdOp::F64x2RelaxedMin):
case uint32_t(SimdOp::F64x2RelaxedMax): {
if (!f.moduleEnv().v128RelaxedEnabled()) {
return f.iter().unrecognizedOpcode(&op);
}
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
}
# endif
default:

View File

@ -496,6 +496,10 @@ OpKind wasm::Classify(OpBytes op) {
case SimdOp::I64x2ExtMulLowUI32x4:
case SimdOp::I64x2ExtMulHighUI32x4:
case SimdOp::I16x8Q15MulrSatS:
case SimdOp::F32x4RelaxedMin:
case SimdOp::F32x4RelaxedMax:
case SimdOp::F64x2RelaxedMin:
case SimdOp::F64x2RelaxedMax:
WASM_SIMD_OP(OpKind::Binary);
case SimdOp::I8x16Neg:
case SimdOp::I16x8Neg:

View File

@ -1017,6 +1017,15 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
CHECK(
iter.readTernary(ValType::V128, &nothing, &nothing, &nothing));
}
case uint32_t(SimdOp::F32x4RelaxedMin):
case uint32_t(SimdOp::F32x4RelaxedMax):
case uint32_t(SimdOp::F64x2RelaxedMin):
case uint32_t(SimdOp::F64x2RelaxedMax): {
if (!env.v128RelaxedEnabled()) {
return iter.unrecognizedOpcode(&op);
}
CHECK(iter.readBinary(ValType::V128, &nothing, &nothing));
}
# endif
default: