VectorOps: Restructure DPPOpImpl. This will get reused by AVX128

This commit is contained in:
Ryan Houdek 2024-06-19 03:21:33 -07:00 committed by Alyssa Rosenzweig
parent 76bd22d279
commit cc168ce0fb
2 changed files with 12 additions and 12 deletions

View File

@ -1247,8 +1247,7 @@ private:
Ref CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);
Ref DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
const X86Tables::DecodedOperand& Imm, size_t ElementSize);
Ref DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize);
Ref VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm);

View File

@ -4099,9 +4099,7 @@ void OpDispatchBuilder::PHMINPOSUWOp(OpcodeArgs) {
StoreResult(FPRClass, Op, Result, -1);
}
Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
const X86Tables::DecodedOperand& Imm, size_t ElementSize) {
const uint8_t Mask = Imm.Literal();
Ref OpDispatchBuilder::DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize) {
const auto SizeMask = [ElementSize]() {
if (ElementSize == 4) {
return 0b1111;
@ -4119,7 +4117,6 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
return FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK;
}();
const auto DstSize = GetDstSize(Op);
Ref ZeroVec = LoadZeroVector(DstSize);
if (SrcMask == 0 || DstMask == 0) {
@ -4127,11 +4124,8 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
return ZeroVec;
}
Ref Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags);
Ref Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags);
// First step is to do an FMUL
Ref Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V);
Ref Temp = _VFMul(DstSize, ElementSize, Src1, Src2);
// Now mask results based on IndexMask.
if (SrcMask != SizeMask) {
@ -4283,7 +4277,11 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
template<size_t ElementSize>
void OpDispatchBuilder::DPPOp(OpcodeArgs) {
Ref Result = DPPOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], ElementSize);
Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
Ref Result = DPPOpImpl(GetDstSize(Op), Dest, Src, Op->Src[1].Literal(), ElementSize);
StoreResult(FPRClass, Op, Result, -1);
}
@ -4349,7 +4347,10 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
// 256-bit DPPS isn't handled by the 128-bit solution.
Result = VDPPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
} else {
Result = DPPOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], ElementSize);
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
Result = DPPOpImpl(GetDstSize(Op), Src1, Src2, Op->Src[2].Literal(), ElementSize);
}
// We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector