Bug 1757244 - Support VEX encoding for misc multiplication operations. r=jseward

Differential Revision: https://phabricator.services.mozilla.com/D140112
This commit is contained in:
Yury Delendik 2022-03-15 21:55:51 +00:00
parent 2af2e9f582
commit 2f1eb45418
7 changed files with 148 additions and 203 deletions

View File

@ -170,6 +170,24 @@ c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0
['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`],
['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`],
['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`],
['i16x8.extmul_high_i8x16_s', `
66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15
c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15
66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0
c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0
66 41 0f d5 c7 pmullw %xmm15, %xmm0`],
['i32x4.extmul_low_i16x8_u', `
c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15
c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0
66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`],
['i64x2.extmul_low_i32x4_s', `
c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15
c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0
66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`],
['i16x8.q15mulr_sat_s', `
c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0
c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15
66 41 0f ef c7 pxor %xmm15, %xmm0`],
]);
// Bitwise binary ops

View File

@ -2485,86 +2485,57 @@ class MacroAssembler : public MacroAssemblerSpecific {
// Note for the extMul opcodes, the NxM designation is for the input lanes;
// the output lanes are twice as wide.
inline void extMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void extMulHighInt8x16(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void extMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void extMulHighInt16x8(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void extMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void extMulHighInt32x4(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void unsignedExtMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedExtMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) DEFINED_ON(arm64);
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
// Integer Negate

View File

@ -2659,137 +2659,72 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1));
}
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
}
void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
}
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
}
void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
}
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
}
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
}
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
}
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
}
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
}
void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
}
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
}
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
}
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
}
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
}
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Smull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
}
void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
}
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Smull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
}
void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
}
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Umull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
}
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
}
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
}
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
}
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));

View File

@ -2641,43 +2641,43 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
masm.widenDotInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::I16x8ExtmulLowI8x16S:
masm.extMulLowInt8x16(rhs, lhsDest);
masm.extMulLowInt8x16(lhs, rhs, dest);
break;
case wasm::SimdOp::I16x8ExtmulHighI8x16S:
masm.extMulHighInt8x16(rhs, lhsDest);
masm.extMulHighInt8x16(lhs, rhs, dest);
break;
case wasm::SimdOp::I16x8ExtmulLowI8x16U:
masm.unsignedExtMulLowInt8x16(rhs, lhsDest);
masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
break;
case wasm::SimdOp::I16x8ExtmulHighI8x16U:
masm.unsignedExtMulHighInt8x16(rhs, lhsDest);
masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
break;
case wasm::SimdOp::I32x4ExtmulLowI16x8S:
masm.extMulLowInt16x8(rhs, lhsDest);
masm.extMulLowInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::I32x4ExtmulHighI16x8S:
masm.extMulHighInt16x8(rhs, lhsDest);
masm.extMulHighInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::I32x4ExtmulLowI16x8U:
masm.unsignedExtMulLowInt16x8(rhs, lhsDest);
masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::I32x4ExtmulHighI16x8U:
masm.unsignedExtMulHighInt16x8(rhs, lhsDest);
masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::I64x2ExtmulLowI32x4S:
masm.extMulLowInt32x4(rhs, lhsDest);
masm.extMulLowInt32x4(lhs, rhs, dest);
break;
case wasm::SimdOp::I64x2ExtmulHighI32x4S:
masm.extMulHighInt32x4(rhs, lhsDest);
masm.extMulHighInt32x4(lhs, rhs, dest);
break;
case wasm::SimdOp::I64x2ExtmulLowI32x4U:
masm.unsignedExtMulLowInt32x4(rhs, lhsDest);
masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
break;
case wasm::SimdOp::I64x2ExtmulHighI32x4U:
masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
break;
case wasm::SimdOp::I16x8Q15MulrSatS:
masm.q15MulrSatInt16x8(rhs, lhsDest);
masm.q15MulrSatInt16x8(lhs, rhs, dest);
break;
case wasm::SimdOp::F32x4RelaxedMin:
masm.minFloat32x4Relaxed(lhs, rhs, dest);
@ -2693,13 +2693,13 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
break;
# ifdef ENABLE_WASM_SIMD_WORMHOLE
case wasm::SimdOp::MozWHSELFTEST:
masm.loadConstantSimd128(wasm::WormholeSignature(), lhsDest);
masm.loadConstantSimd128(wasm::WormholeSignature(), dest);
break;
case wasm::SimdOp::MozWHPMADDUBSW:
masm.vpmaddubsw(rhs, lhsDest, lhsDest);
masm.vpmaddubsw(rhs, lhs, dest);
break;
case wasm::SimdOp::MozWHPMADDWD:
masm.vpmaddwd(Operand(rhs), lhsDest, lhsDest);
masm.vpmaddwd(Operand(rhs), lhs, dest);
break;
# endif
default:

View File

@ -1089,10 +1089,25 @@ void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
case wasm::SimdOp::I16x8NarrowI32x4S:
case wasm::SimdOp::I16x8NarrowI32x4U:
case wasm::SimdOp::I32x4DotI16x8S:
case wasm::SimdOp::I16x8ExtmulLowI8x16S:
case wasm::SimdOp::I16x8ExtmulHighI8x16S:
case wasm::SimdOp::I16x8ExtmulLowI8x16U:
case wasm::SimdOp::I16x8ExtmulHighI8x16U:
case wasm::SimdOp::I32x4ExtmulLowI16x8S:
case wasm::SimdOp::I32x4ExtmulHighI16x8S:
case wasm::SimdOp::I32x4ExtmulLowI16x8U:
case wasm::SimdOp::I32x4ExtmulHighI16x8U:
case wasm::SimdOp::I64x2ExtmulLowI32x4S:
case wasm::SimdOp::I64x2ExtmulHighI32x4S:
case wasm::SimdOp::I64x2ExtmulLowI32x4U:
case wasm::SimdOp::I64x2ExtmulHighI32x4U:
case wasm::SimdOp::I16x8Q15MulrSatS:
case wasm::SimdOp::F32x4RelaxedMin:
case wasm::SimdOp::F32x4RelaxedMax:
case wasm::SimdOp::F64x2RelaxedMin:
case wasm::SimdOp::F64x2RelaxedMax:
case wasm::SimdOp::MozWHPMADDUBSW:
case wasm::SimdOp::MozWHPMADDWD:
if (isThreeOpAllowed()) {
auto* lir = new (alloc())
LWasmBinarySimd128(op, useRegisterAtStart(lhs),

View File

@ -1756,113 +1756,119 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
// discussion on the PR (scroll down far enough) on how to avoid one of them,
// but we need benchmarking + correctness proofs.
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
widenLowInt8x16(rhs, scratch);
widenLowInt8x16(lhsDest, lhsDest);
mulInt16x8(lhsDest, scratch, lhsDest);
widenLowInt8x16(lhs, dest);
mulInt16x8(dest, scratch, dest);
}
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
widenHighInt8x16(rhs, scratch);
widenHighInt8x16(lhsDest, lhsDest);
mulInt16x8(lhsDest, scratch, lhsDest);
widenHighInt8x16(lhs, dest);
mulInt16x8(dest, scratch, dest);
}
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
unsignedWidenLowInt8x16(rhs, scratch);
unsignedWidenLowInt8x16(lhsDest, lhsDest);
mulInt16x8(lhsDest, scratch, lhsDest);
unsignedWidenLowInt8x16(lhs, dest);
mulInt16x8(dest, scratch, dest);
}
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
unsignedWidenHighInt8x16(rhs, scratch);
unsignedWidenHighInt8x16(lhsDest, lhsDest);
mulInt16x8(lhsDest, scratch, lhsDest);
unsignedWidenHighInt8x16(lhs, dest);
mulInt16x8(dest, scratch, dest);
}
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest);
vpmulhw(Operand(rhs), scratch, scratch);
vpunpcklwd(scratch, lhsDest, lhsDest);
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmulhw(Operand(rhs), lhsCopy, scratch);
vpmullw(Operand(rhs), lhs, dest);
vpunpcklwd(scratch, dest, dest);
}
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest);
vpmulhw(Operand(rhs), scratch, scratch);
vpunpckhwd(scratch, lhsDest, lhsDest);
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmulhw(Operand(rhs), lhsCopy, scratch);
vpmullw(Operand(rhs), lhs, dest);
vpunpckhwd(scratch, dest, dest);
}
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest);
vpmulhuw(Operand(rhs), scratch, scratch);
vpunpcklwd(scratch, lhsDest, lhsDest);
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmulhuw(Operand(rhs), lhsCopy, scratch);
vpmullw(Operand(rhs), lhs, dest);
vpunpcklwd(scratch, dest, dest);
}
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vmovdqa(lhsDest, scratch);
vpmullw(Operand(rhs), lhsDest, lhsDest);
vpmulhuw(Operand(rhs), scratch, scratch);
vpunpckhwd(scratch, lhsDest, lhsDest);
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
vpmulhuw(Operand(rhs), lhsCopy, scratch);
vpmullw(Operand(rhs), lhs, dest);
vpunpckhwd(scratch, dest, dest);
}
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest);
vpmuldq(scratch, lhsDest, lhsDest);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
vpmuldq(scratch, dest, dest);
}
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest);
vpmuldq(scratch, lhsDest, lhsDest);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
vpmuldq(scratch, dest, dest);
}
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest);
vpmuludq(Operand(scratch), lhsDest, lhsDest);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
vpmuludq(Operand(scratch), dest, dest);
}
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest);
vpmuludq(Operand(scratch), lhsDest, lhsDest);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
vpmuludq(Operand(scratch), dest, dest);
}
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
FloatRegister dest) {
ScratchSimd128Scope scratch(*this);
vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
vmovdqa(lhsDest, scratch);
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch, scratch);
vpxor(scratch, lhsDest, lhsDest);
vpmulhrsw(Operand(rhs), lhs, dest);
FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch);
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch);
vpxor(scratch, dest, dest);
}
// Integer negate

View File

@ -6814,55 +6814,55 @@ static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
}
static void ExtMulLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt8x16(rs, rsd);
masm.extMulLowInt8x16(rsd, rs, rsd);
}
static void ExtMulHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt8x16(rs, rsd);
masm.extMulHighInt8x16(rsd, rs, rsd);
}
static void ExtMulLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt8x16(rs, rsd);
masm.unsignedExtMulLowInt8x16(rsd, rs, rsd);
}
static void ExtMulHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt8x16(rs, rsd);
masm.unsignedExtMulHighInt8x16(rsd, rs, rsd);
}
static void ExtMulLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt16x8(rs, rsd);
masm.extMulLowInt16x8(rsd, rs, rsd);
}
static void ExtMulHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt16x8(rs, rsd);
masm.extMulHighInt16x8(rsd, rs, rsd);
}
static void ExtMulLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt16x8(rs, rsd);
masm.unsignedExtMulLowInt16x8(rsd, rs, rsd);
}
static void ExtMulHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt16x8(rs, rsd);
masm.unsignedExtMulHighInt16x8(rsd, rs, rsd);
}
static void ExtMulLowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulLowInt32x4(rs, rsd);
masm.extMulLowInt32x4(rsd, rs, rsd);
}
static void ExtMulHighI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.extMulHighInt32x4(rs, rsd);
masm.extMulHighInt32x4(rsd, rs, rsd);
}
static void ExtMulLowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulLowInt32x4(rs, rsd);
masm.unsignedExtMulLowInt32x4(rsd, rs, rsd);
}
static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt32x4(rs, rsd);
masm.unsignedExtMulHighInt32x4(rsd, rs, rsd);
}
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.q15MulrSatInt16x8(rs, rsd);
masm.q15MulrSatInt16x8(rsd, rs, rsd);
}
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,