mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-14 09:28:34 +00:00
VectorOps: Restructure DPPOpImpl. This will get reused by AVX128
This commit is contained in:
parent
76bd22d279
commit
cc168ce0fb
@ -1247,8 +1247,7 @@ private:
|
||||
|
||||
Ref CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);
|
||||
|
||||
Ref DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
|
||||
const X86Tables::DecodedOperand& Imm, size_t ElementSize);
|
||||
Ref DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize);
|
||||
|
||||
Ref VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm);
|
||||
|
||||
|
@ -4099,9 +4099,7 @@ void OpDispatchBuilder::PHMINPOSUWOp(OpcodeArgs) {
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
|
||||
const X86Tables::DecodedOperand& Imm, size_t ElementSize) {
|
||||
const uint8_t Mask = Imm.Literal();
|
||||
Ref OpDispatchBuilder::DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize) {
|
||||
const auto SizeMask = [ElementSize]() {
|
||||
if (ElementSize == 4) {
|
||||
return 0b1111;
|
||||
@ -4119,7 +4117,6 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
|
||||
|
||||
return FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK;
|
||||
}();
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
|
||||
Ref ZeroVec = LoadZeroVector(DstSize);
|
||||
if (SrcMask == 0 || DstMask == 0) {
|
||||
@ -4127,11 +4124,8 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
|
||||
return ZeroVec;
|
||||
}
|
||||
|
||||
Ref Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags);
|
||||
Ref Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags);
|
||||
|
||||
// First step is to do an FMUL
|
||||
Ref Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V);
|
||||
Ref Temp = _VFMul(DstSize, ElementSize, Src1, Src2);
|
||||
|
||||
// Now mask results based on IndexMask.
|
||||
if (SrcMask != SizeMask) {
|
||||
@ -4283,7 +4277,11 @@ Ref OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Sr
|
||||
|
||||
template<size_t ElementSize>
|
||||
void OpDispatchBuilder::DPPOp(OpcodeArgs) {
|
||||
Ref Result = DPPOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], ElementSize);
|
||||
|
||||
Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
|
||||
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
|
||||
Ref Result = DPPOpImpl(GetDstSize(Op), Dest, Src, Op->Src[1].Literal(), ElementSize);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -4349,7 +4347,10 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
|
||||
// 256-bit DPPS isn't handled by the 128-bit solution.
|
||||
Result = VDPPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
|
||||
} else {
|
||||
Result = DPPOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], ElementSize);
|
||||
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
|
||||
|
||||
Result = DPPOpImpl(GetDstSize(Op), Src1, Src2, Op->Src[2].Literal(), ElementSize);
|
||||
}
|
||||
|
||||
// We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector
|
||||
|
Loading…
Reference in New Issue
Block a user