mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-12 00:50:40 +00:00
Bug 1757244 - Support VEX encoding for misc multiplication operations. r=jseward
Differential Revision: https://phabricator.services.mozilla.com/D140112
This commit is contained in:
parent
2af2e9f582
commit
2f1eb45418
@ -170,6 +170,24 @@ c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0
|
||||
['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`],
|
||||
['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`],
|
||||
['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`],
|
||||
['i16x8.extmul_high_i8x16_s', `
|
||||
66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15
|
||||
c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15
|
||||
66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0
|
||||
c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0
|
||||
66 41 0f d5 c7 pmullw %xmm15, %xmm0`],
|
||||
['i32x4.extmul_low_i16x8_u', `
|
||||
c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15
|
||||
c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0
|
||||
66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`],
|
||||
['i64x2.extmul_low_i32x4_s', `
|
||||
c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15
|
||||
c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0
|
||||
66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`],
|
||||
['i16x8.q15mulr_sat_s', `
|
||||
c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0
|
||||
c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15
|
||||
66 41 0f ef c7 pxor %xmm15, %xmm0`],
|
||||
]);
|
||||
|
||||
// Bitwise binary ops
|
||||
|
@ -2485,86 +2485,57 @@ class MacroAssembler : public MacroAssemblerSpecific {
|
||||
|
||||
// Note for the extMul opcodes, the NxM designation is for the input lanes;
|
||||
// the output lanes are twice as wide.
|
||||
inline void extMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void extMulHighInt8x16(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt8x16(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void extMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void extMulHighInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void extMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void extMulHighInt32x4(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt32x4(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void unsignedExtMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
|
||||
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(arm64);
|
||||
FloatRegister dest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
// Integer Negate
|
||||
|
||||
|
@ -2659,137 +2659,72 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
|
||||
Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull(Simd8H(lhsDest), Simd8B(lhsDest), Simd8B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull2(Simd8H(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull(Simd4S(lhsDest), Simd4H(lhsDest), Simd4H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull2(Simd4S(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Smull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull(Simd2D(lhsDest), Simd2S(lhsDest), Simd2S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
|
||||
|
@ -2641,43 +2641,43 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
|
||||
masm.widenDotInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8ExtmulLowI8x16S:
|
||||
masm.extMulLowInt8x16(rhs, lhsDest);
|
||||
masm.extMulLowInt8x16(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8ExtmulHighI8x16S:
|
||||
masm.extMulHighInt8x16(rhs, lhsDest);
|
||||
masm.extMulHighInt8x16(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8ExtmulLowI8x16U:
|
||||
masm.unsignedExtMulLowInt8x16(rhs, lhsDest);
|
||||
masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8ExtmulHighI8x16U:
|
||||
masm.unsignedExtMulHighInt8x16(rhs, lhsDest);
|
||||
masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I32x4ExtmulLowI16x8S:
|
||||
masm.extMulLowInt16x8(rhs, lhsDest);
|
||||
masm.extMulLowInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I32x4ExtmulHighI16x8S:
|
||||
masm.extMulHighInt16x8(rhs, lhsDest);
|
||||
masm.extMulHighInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I32x4ExtmulLowI16x8U:
|
||||
masm.unsignedExtMulLowInt16x8(rhs, lhsDest);
|
||||
masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I32x4ExtmulHighI16x8U:
|
||||
masm.unsignedExtMulHighInt16x8(rhs, lhsDest);
|
||||
masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I64x2ExtmulLowI32x4S:
|
||||
masm.extMulLowInt32x4(rhs, lhsDest);
|
||||
masm.extMulLowInt32x4(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I64x2ExtmulHighI32x4S:
|
||||
masm.extMulHighInt32x4(rhs, lhsDest);
|
||||
masm.extMulHighInt32x4(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I64x2ExtmulLowI32x4U:
|
||||
masm.unsignedExtMulLowInt32x4(rhs, lhsDest);
|
||||
masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I64x2ExtmulHighI32x4U:
|
||||
masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
|
||||
masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8Q15MulrSatS:
|
||||
masm.q15MulrSatInt16x8(rhs, lhsDest);
|
||||
masm.q15MulrSatInt16x8(lhs, rhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::F32x4RelaxedMin:
|
||||
masm.minFloat32x4Relaxed(lhs, rhs, dest);
|
||||
@ -2693,13 +2693,13 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
|
||||
break;
|
||||
# ifdef ENABLE_WASM_SIMD_WORMHOLE
|
||||
case wasm::SimdOp::MozWHSELFTEST:
|
||||
masm.loadConstantSimd128(wasm::WormholeSignature(), lhsDest);
|
||||
masm.loadConstantSimd128(wasm::WormholeSignature(), dest);
|
||||
break;
|
||||
case wasm::SimdOp::MozWHPMADDUBSW:
|
||||
masm.vpmaddubsw(rhs, lhsDest, lhsDest);
|
||||
masm.vpmaddubsw(rhs, lhs, dest);
|
||||
break;
|
||||
case wasm::SimdOp::MozWHPMADDWD:
|
||||
masm.vpmaddwd(Operand(rhs), lhsDest, lhsDest);
|
||||
masm.vpmaddwd(Operand(rhs), lhs, dest);
|
||||
break;
|
||||
# endif
|
||||
default:
|
||||
|
@ -1089,10 +1089,25 @@ void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
|
||||
case wasm::SimdOp::I16x8NarrowI32x4S:
|
||||
case wasm::SimdOp::I16x8NarrowI32x4U:
|
||||
case wasm::SimdOp::I32x4DotI16x8S:
|
||||
case wasm::SimdOp::I16x8ExtmulLowI8x16S:
|
||||
case wasm::SimdOp::I16x8ExtmulHighI8x16S:
|
||||
case wasm::SimdOp::I16x8ExtmulLowI8x16U:
|
||||
case wasm::SimdOp::I16x8ExtmulHighI8x16U:
|
||||
case wasm::SimdOp::I32x4ExtmulLowI16x8S:
|
||||
case wasm::SimdOp::I32x4ExtmulHighI16x8S:
|
||||
case wasm::SimdOp::I32x4ExtmulLowI16x8U:
|
||||
case wasm::SimdOp::I32x4ExtmulHighI16x8U:
|
||||
case wasm::SimdOp::I64x2ExtmulLowI32x4S:
|
||||
case wasm::SimdOp::I64x2ExtmulHighI32x4S:
|
||||
case wasm::SimdOp::I64x2ExtmulLowI32x4U:
|
||||
case wasm::SimdOp::I64x2ExtmulHighI32x4U:
|
||||
case wasm::SimdOp::I16x8Q15MulrSatS:
|
||||
case wasm::SimdOp::F32x4RelaxedMin:
|
||||
case wasm::SimdOp::F32x4RelaxedMax:
|
||||
case wasm::SimdOp::F64x2RelaxedMin:
|
||||
case wasm::SimdOp::F64x2RelaxedMax:
|
||||
case wasm::SimdOp::MozWHPMADDUBSW:
|
||||
case wasm::SimdOp::MozWHPMADDWD:
|
||||
if (isThreeOpAllowed()) {
|
||||
auto* lir = new (alloc())
|
||||
LWasmBinarySimd128(op, useRegisterAtStart(lhs),
|
||||
|
@ -1756,113 +1756,119 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
|
||||
// discussion on the PR (scroll down far enough) on how to avoid one of them,
|
||||
// but we need benchmarking + correctness proofs.
|
||||
|
||||
void MacroAssembler::extMulLowInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
widenLowInt8x16(rhs, scratch);
|
||||
widenLowInt8x16(lhsDest, lhsDest);
|
||||
mulInt16x8(lhsDest, scratch, lhsDest);
|
||||
widenLowInt8x16(lhs, dest);
|
||||
mulInt16x8(dest, scratch, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
widenHighInt8x16(rhs, scratch);
|
||||
widenHighInt8x16(lhsDest, lhsDest);
|
||||
mulInt16x8(lhsDest, scratch, lhsDest);
|
||||
widenHighInt8x16(lhs, dest);
|
||||
mulInt16x8(dest, scratch, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
unsignedWidenLowInt8x16(rhs, scratch);
|
||||
unsignedWidenLowInt8x16(lhsDest, lhsDest);
|
||||
mulInt16x8(lhsDest, scratch, lhsDest);
|
||||
unsignedWidenLowInt8x16(lhs, dest);
|
||||
mulInt16x8(dest, scratch, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
unsignedWidenHighInt8x16(rhs, scratch);
|
||||
unsignedWidenHighInt8x16(lhsDest, lhsDest);
|
||||
mulInt16x8(lhsDest, scratch, lhsDest);
|
||||
unsignedWidenHighInt8x16(lhs, dest);
|
||||
mulInt16x8(dest, scratch, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpmullw(Operand(rhs), lhsDest, lhsDest);
|
||||
vpmulhw(Operand(rhs), scratch, scratch);
|
||||
vpunpcklwd(scratch, lhsDest, lhsDest);
|
||||
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
|
||||
vpmulhw(Operand(rhs), lhsCopy, scratch);
|
||||
vpmullw(Operand(rhs), lhs, dest);
|
||||
vpunpcklwd(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpmullw(Operand(rhs), lhsDest, lhsDest);
|
||||
vpmulhw(Operand(rhs), scratch, scratch);
|
||||
vpunpckhwd(scratch, lhsDest, lhsDest);
|
||||
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
|
||||
vpmulhw(Operand(rhs), lhsCopy, scratch);
|
||||
vpmullw(Operand(rhs), lhs, dest);
|
||||
vpunpckhwd(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpmullw(Operand(rhs), lhsDest, lhsDest);
|
||||
vpmulhuw(Operand(rhs), scratch, scratch);
|
||||
vpunpcklwd(scratch, lhsDest, lhsDest);
|
||||
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
|
||||
vpmulhuw(Operand(rhs), lhsCopy, scratch);
|
||||
vpmullw(Operand(rhs), lhs, dest);
|
||||
vpunpcklwd(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpmullw(Operand(rhs), lhsDest, lhsDest);
|
||||
vpmulhuw(Operand(rhs), scratch, scratch);
|
||||
vpunpckhwd(scratch, lhsDest, lhsDest);
|
||||
FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
|
||||
vpmulhuw(Operand(rhs), lhsCopy, scratch);
|
||||
vpmullw(Operand(rhs), lhs, dest);
|
||||
vpunpckhwd(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulLowInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest);
|
||||
vpmuldq(scratch, lhsDest, lhsDest);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
|
||||
vpmuldq(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extMulHighInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest);
|
||||
vpmuldq(scratch, lhsDest, lhsDest);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
|
||||
vpmuldq(scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhsDest, scratch);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, lhsDest);
|
||||
vpmuludq(Operand(scratch), lhsDest, lhsDest);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
|
||||
vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
|
||||
vpmuludq(Operand(scratch), dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
|
||||
FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhsDest, scratch);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, lhsDest);
|
||||
vpmuludq(Operand(scratch), lhsDest, lhsDest);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
|
||||
vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
|
||||
vpmuludq(Operand(scratch), dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch, scratch);
|
||||
vpxor(scratch, lhsDest, lhsDest);
|
||||
vpmulhrsw(Operand(rhs), lhs, dest);
|
||||
FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch);
|
||||
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch);
|
||||
vpxor(scratch, dest, dest);
|
||||
}
|
||||
|
||||
// Integer negate
|
||||
|
@ -6814,55 +6814,55 @@ static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
}
|
||||
|
||||
static void ExtMulLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulLowInt8x16(rs, rsd);
|
||||
masm.extMulLowInt8x16(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulHighInt8x16(rs, rsd);
|
||||
masm.extMulHighInt8x16(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulLowInt8x16(rs, rsd);
|
||||
masm.unsignedExtMulLowInt8x16(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulHighInt8x16(rs, rsd);
|
||||
masm.unsignedExtMulHighInt8x16(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulLowInt16x8(rs, rsd);
|
||||
masm.extMulLowInt16x8(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulHighInt16x8(rs, rsd);
|
||||
masm.extMulHighInt16x8(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulLowInt16x8(rs, rsd);
|
||||
masm.unsignedExtMulLowInt16x8(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulHighInt16x8(rs, rsd);
|
||||
masm.unsignedExtMulHighInt16x8(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulLowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulLowInt32x4(rs, rsd);
|
||||
masm.extMulLowInt32x4(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.extMulHighInt32x4(rs, rsd);
|
||||
masm.extMulHighInt32x4(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulLowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulLowInt32x4(rs, rsd);
|
||||
masm.unsignedExtMulLowInt32x4(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.unsignedExtMulHighInt32x4(rs, rsd);
|
||||
masm.unsignedExtMulHighInt32x4(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.q15MulrSatInt16x8(rs, rsd);
|
||||
masm.q15MulrSatInt16x8(rsd, rs, rsd);
|
||||
}
|
||||
|
||||
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
|
||||
|
Loading…
Reference in New Issue
Block a user