Bug 1757244 - Support VEX encoding for misc multiplication operations. r=jseward

Differential Revision: https://phabricator.services.mozilla.com/D140112
This commit is contained in:
Yury Delendik 2022-03-15 21:55:51 +00:00
parent 2af2e9f582
commit 2f1eb45418
7 changed files with 148 additions and 203 deletions

View File

@ -170,6 +170,24 @@ c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0
['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`], ['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`],
['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`], ['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`],
['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`], ['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`],
['i16x8.extmul_high_i8x16_s', `
66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15
c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15
66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0
c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0
66 41 0f d5 c7 pmullw %xmm15, %xmm0`],
['i32x4.extmul_low_i16x8_u', `
c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15
c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0
66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`],
['i64x2.extmul_low_i32x4_s', `
c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15
c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0
66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`],
['i16x8.q15mulr_sat_s', `
c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0
c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15
66 41 0f ef c7 pxor %xmm15, %xmm0`],
]); ]);
// Bitwise binary ops // Bitwise binary ops

View File

@ -2485,86 +2485,57 @@ class MacroAssembler : public MacroAssemblerSpecific {
// Note for the extMul opcodes, the NxM designation is for the input lanes; // Note for the extMul opcodes, the NxM designation is for the input lanes;
// the output lanes are twice as wide. // the output lanes are twice as wide.
inline void extMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, inline void extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void extMulHighInt8x16(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, inline void extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void extMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, inline void extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void extMulHighInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, inline void extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void extMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, inline void extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void extMulHighInt32x4(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, inline void extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void unsignedExtMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, inline void unsignedExtMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs, inline void q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64); FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
// Integer Negate // Integer Negate

View File

@ -2659,137 +2659,72 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1)); Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1));
} }
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
}
void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs)); Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
} }
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
}
void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs)); Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
} }
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
}
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs, void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs)); Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
} }
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
}
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs, void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs)); Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
} }
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
}
void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs)); Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
} }
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs)); Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
} }
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
}
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs, void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs)); Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
} }
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs, void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs)); Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
} }
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
}
void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs)); Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
} }
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
}
void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs)); Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
} }
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
}
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs, void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs)); Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
} }
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
}
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs, void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
FloatRegister rhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs)); Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
} }
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs, void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) { FloatRegister dest) {
Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));

View File

@ -2641,43 +2641,43 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
masm.widenDotInt16x8(lhs, rhs, dest); masm.widenDotInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I16x8ExtmulLowI8x16S: case wasm::SimdOp::I16x8ExtmulLowI8x16S:
masm.extMulLowInt8x16(rhs, lhsDest); masm.extMulLowInt8x16(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I16x8ExtmulHighI8x16S: case wasm::SimdOp::I16x8ExtmulHighI8x16S:
masm.extMulHighInt8x16(rhs, lhsDest); masm.extMulHighInt8x16(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I16x8ExtmulLowI8x16U: case wasm::SimdOp::I16x8ExtmulLowI8x16U:
masm.unsignedExtMulLowInt8x16(rhs, lhsDest); masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I16x8ExtmulHighI8x16U: case wasm::SimdOp::I16x8ExtmulHighI8x16U:
masm.unsignedExtMulHighInt8x16(rhs, lhsDest); masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I32x4ExtmulLowI16x8S: case wasm::SimdOp::I32x4ExtmulLowI16x8S:
masm.extMulLowInt16x8(rhs, lhsDest); masm.extMulLowInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I32x4ExtmulHighI16x8S: case wasm::SimdOp::I32x4ExtmulHighI16x8S:
masm.extMulHighInt16x8(rhs, lhsDest); masm.extMulHighInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I32x4ExtmulLowI16x8U: case wasm::SimdOp::I32x4ExtmulLowI16x8U:
masm.unsignedExtMulLowInt16x8(rhs, lhsDest); masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I32x4ExtmulHighI16x8U: case wasm::SimdOp::I32x4ExtmulHighI16x8U:
masm.unsignedExtMulHighInt16x8(rhs, lhsDest); masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I64x2ExtmulLowI32x4S: case wasm::SimdOp::I64x2ExtmulLowI32x4S:
masm.extMulLowInt32x4(rhs, lhsDest); masm.extMulLowInt32x4(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I64x2ExtmulHighI32x4S: case wasm::SimdOp::I64x2ExtmulHighI32x4S:
masm.extMulHighInt32x4(rhs, lhsDest); masm.extMulHighInt32x4(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I64x2ExtmulLowI32x4U: case wasm::SimdOp::I64x2ExtmulLowI32x4U:
masm.unsignedExtMulLowInt32x4(rhs, lhsDest); masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I64x2ExtmulHighI32x4U: case wasm::SimdOp::I64x2ExtmulHighI32x4U:
masm.unsignedExtMulHighInt32x4(rhs, lhsDest); masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
break; break;
case wasm::SimdOp::I16x8Q15MulrSatS: case wasm::SimdOp::I16x8Q15MulrSatS:
masm.q15MulrSatInt16x8(rhs, lhsDest); masm.q15MulrSatInt16x8(lhs, rhs, dest);
break; break;
case wasm::SimdOp::F32x4RelaxedMin: case wasm::SimdOp::F32x4RelaxedMin:
masm.minFloat32x4Relaxed(lhs, rhs, dest); masm.minFloat32x4Relaxed(lhs, rhs, dest);
@ -2693,13 +2693,13 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
break; break;
# ifdef ENABLE_WASM_SIMD_WORMHOLE # ifdef ENABLE_WASM_SIMD_WORMHOLE
case wasm::SimdOp::MozWHSELFTEST: case wasm::SimdOp::MozWHSELFTEST:
masm.loadConstantSimd128(wasm::WormholeSignature(), lhsDest); masm.loadConstantSimd128(wasm::WormholeSignature(), dest);
break; break;
case wasm::SimdOp::MozWHPMADDUBSW: case wasm::SimdOp::MozWHPMADDUBSW:
masm.vpmaddubsw(rhs, lhsDest, lhsDest); masm.vpmaddubsw(rhs, lhs, dest);
break; break;
case wasm::SimdOp::MozWHPMADDWD: case wasm::SimdOp::MozWHPMADDWD:
masm.vpmaddwd(Operand(rhs), lhsDest, lhsDest); masm.vpmaddwd(Operand(rhs), lhs, dest);
break; break;
# endif # endif
default: default:

View File

@ -1089,10 +1089,25 @@ void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
case wasm::SimdOp::I16x8NarrowI32x4S: case wasm::SimdOp::I16x8NarrowI32x4S:
case wasm::SimdOp::I16x8NarrowI32x4U: case wasm::SimdOp::I16x8NarrowI32x4U:
case wasm::SimdOp::I32x4DotI16x8S: case wasm::SimdOp::I32x4DotI16x8S:
case wasm::SimdOp::I16x8ExtmulLowI8x16S:
case wasm::SimdOp::I16x8ExtmulHighI8x16S:
case wasm::SimdOp::I16x8ExtmulLowI8x16U:
case wasm::SimdOp::I16x8ExtmulHighI8x16U:
case wasm::SimdOp::I32x4ExtmulLowI16x8S:
case wasm::SimdOp::I32x4ExtmulHighI16x8S:
case wasm::SimdOp::I32x4ExtmulLowI16x8U:
case wasm::SimdOp::I32x4ExtmulHighI16x8U:
case wasm::SimdOp::I64x2ExtmulLowI32x4S:
case wasm::SimdOp::I64x2ExtmulHighI32x4S:
case wasm::SimdOp::I64x2ExtmulLowI32x4U:
case wasm::SimdOp::I64x2ExtmulHighI32x4U:
case wasm::SimdOp::I16x8Q15MulrSatS:
case wasm::SimdOp::F32x4RelaxedMin: case wasm::SimdOp::F32x4RelaxedMin:
case wasm::SimdOp::F32x4RelaxedMax: case wasm::SimdOp::F32x4RelaxedMax:
case wasm::SimdOp::F64x2RelaxedMin: case wasm::SimdOp::F64x2RelaxedMin:
case wasm::SimdOp::F64x2RelaxedMax: case wasm::SimdOp::F64x2RelaxedMax:
case wasm::SimdOp::MozWHPMADDUBSW:
case wasm::SimdOp::MozWHPMADDWD:
if (isThreeOpAllowed()) { if (isThreeOpAllowed()) {
auto* lir = new (alloc()) auto* lir = new (alloc())
LWasmBinarySimd128(op, useRegisterAtStart(lhs), LWasmBinarySimd128(op, useRegisterAtStart(lhs),

View File

@ -1756,113 +1756,119 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
// discussion on the PR (scroll down far enough) on how to avoid one of them, // discussion on the PR (scroll down far enough) on how to avoid one of them,
// but we need benchmarking + correctness proofs. // but we need benchmarking + correctness proofs.
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs, void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
widenLowInt8x16(rhs, scratch); widenLowInt8x16(rhs, scratch);
widenLowInt8x16(lhsDest, lhsDest); widenLowInt8x16(lhs, dest);
mulInt16x8(lhsDest, scratch, lhsDest); mulInt16x8(dest, scratch, dest);
} }
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs, void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
widenHighInt8x16(rhs, scratch); widenHighInt8x16(rhs, scratch);
widenHighInt8x16(lhsDest, lhsDest); widenHighInt8x16(lhs, dest);
mulInt16x8(lhsDest, scratch, lhsDest); mulInt16x8(dest, scratch, dest);
} }
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs, void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
unsignedWidenLowInt8x16(rhs, scratch); unsignedWidenLowInt8x16(rhs, scratch);
unsignedWidenLowInt8x16(lhsDest, lhsDest); unsignedWidenLowInt8x16(lhs, dest);
mulInt16x8(lhsDest, scratch, lhsDest); mulInt16x8(dest, scratch, dest);
} }
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs, void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
unsignedWidenHighInt8x16(rhs, scratch); unsignedWidenHighInt8x16(rhs, scratch);
unsignedWidenHighInt8x16(lhsDest, lhsDest); unsignedWidenHighInt8x16(lhs, dest);
mulInt16x8(lhsDest, scratch, lhsDest); mulInt16x8(dest, scratch, dest);
} }
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs, void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch); FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest); vpmulhw(Operand(rhs), lhsCopy, scratch);
vpmulhw(Operand(rhs), scratch, scratch); vpmullw(Operand(rhs), lhs, dest);
vpunpcklwd(scratch, lhsDest, lhsDest); vpunpcklwd(scratch, dest, dest);
} }
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs, void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch); FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest); vpmulhw(Operand(rhs), lhsCopy, scratch);
vpmulhw(Operand(rhs), scratch, scratch); vpmullw(Operand(rhs), lhs, dest);
vpunpckhwd(scratch, lhsDest, lhsDest); vpunpckhwd(scratch, dest, dest);
} }
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs, void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch); FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest); vpmulhuw(Operand(rhs), lhsCopy, scratch);
vpmulhuw(Operand(rhs), scratch, scratch); vpmullw(Operand(rhs), lhs, dest);
vpunpcklwd(scratch, lhsDest, lhsDest); vpunpcklwd(scratch, dest, dest);
} }
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs, void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch); FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest); vpmulhuw(Operand(rhs), lhsCopy, scratch);
vpmulhuw(Operand(rhs), scratch, scratch); vpmullw(Operand(rhs), lhs, dest);
vpunpckhwd(scratch, lhsDest, lhsDest); vpunpckhwd(scratch, dest, dest);
} }
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs, void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch); vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest); vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
vpmuldq(scratch, lhsDest, lhsDest); vpmuldq(scratch, dest, dest);
} }
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs, void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch); vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest); vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
vpmuldq(scratch, lhsDest, lhsDest); vpmuldq(scratch, dest, dest);
} }
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs, void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch); vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest); vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
vpmuludq(Operand(scratch), lhsDest, lhsDest); vpmuludq(Operand(scratch), dest, dest);
} }
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs, void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
FloatRegister lhsDest) { FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch); vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest); vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
vpmuludq(Operand(scratch), lhsDest, lhsDest); vpmuludq(Operand(scratch), dest, dest);
} }
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs, void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister lhsDest) { FloatRegister dest) {
ScratchSimd128Scope scratch(*this); ScratchSimd128Scope scratch(*this);
vpmulhrsw(Operand(rhs), lhsDest, lhsDest); vpmulhrsw(Operand(rhs), lhs, dest);
vmovdqa(lhsDest, scratch); FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch);
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch, scratch); vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch);
vpxor(scratch, lhsDest, lhsDest); vpxor(scratch, dest, dest);
} }
// Integer negate // Integer negate

View File

@ -6814,55 +6814,55 @@ static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
} }
static void ExtMulLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt8x16(rs, rsd); masm.extMulLowInt8x16(rsd, rs, rsd);
} }
static void ExtMulHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt8x16(rs, rsd); masm.extMulHighInt8x16(rsd, rs, rsd);
} }
static void ExtMulLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt8x16(rs, rsd); masm.unsignedExtMulLowInt8x16(rsd, rs, rsd);
} }
static void ExtMulHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt8x16(rs, rsd); masm.unsignedExtMulHighInt8x16(rsd, rs, rsd);
} }
static void ExtMulLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt16x8(rs, rsd); masm.extMulLowInt16x8(rsd, rs, rsd);
} }
static void ExtMulHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt16x8(rs, rsd); masm.extMulHighInt16x8(rsd, rs, rsd);
} }
static void ExtMulLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt16x8(rs, rsd); masm.unsignedExtMulLowInt16x8(rsd, rs, rsd);
} }
static void ExtMulHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt16x8(rs, rsd); masm.unsignedExtMulHighInt16x8(rsd, rs, rsd);
} }
static void ExtMulLowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt32x4(rs, rsd); masm.extMulLowInt32x4(rsd, rs, rsd);
} }
static void ExtMulHighI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt32x4(rs, rsd); masm.extMulHighInt32x4(rsd, rs, rsd);
} }
static void ExtMulLowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulLowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt32x4(rs, rsd); masm.unsignedExtMulLowInt32x4(rsd, rs, rsd);
} }
static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt32x4(rs, rsd); masm.unsignedExtMulHighInt32x4(rsd, rs, rsd);
} }
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.q15MulrSatInt16x8(rs, rsd); masm.q15MulrSatInt16x8(rsd, rs, rsd);
} }
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond, static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,