IR: Change VSToFGPRInsert to use IR::OpSize

This commit is contained in:
Ryan Houdek 2024-10-27 18:29:34 -07:00
parent 5626f4e50a
commit 37d092aab8
No known key found for this signature in database
3 changed files with 9 additions and 9 deletions

View File

@ -1006,8 +1006,8 @@ void OpDispatchBuilder::AVX128_MOVVectorUnaligned(OpcodeArgs) {
template<IR::OpSize DstElementSize>
void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) {
const auto SrcSize = GetSrcSize(Op);
const auto DstSize = GetDstSize(Op);
const auto SrcSize = OpSizeFromSrc(Op);
const auto DstSize = OpSizeFromDst(Op);
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
@ -1022,13 +1022,13 @@ void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) {
// then it is more optimal to load in to a GPR and convert between GPR->FPR.
// ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't.
auto Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
Result.Low = _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1.Low, Src2, false);
Result.Low = _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1.Low, Src2, false);
} else {
// In the case of cvtsi2s{s,d} where the source and destination are the same size,
// then it is more optimal to load in to the FPR register directly and convert there.
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
// Always signed
Result.Low = _VSToFVectorInsert(IR::SizeToOpSize(DstSize), DstElementSize, DstElementSize, Src1.Low, Src2.Low, false, false);
Result.Low = _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1.Low, Src2.Low, false, false);
}
[[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

View File

@ -428,27 +428,27 @@ Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, I
// We load the full vector width when dealing with a source vector,
// so that we don't do any unnecessary zero extension to the scalar
// element that we're going to operate on.
const auto SrcSize = GetSrcSize(Op);
const auto SrcSize = OpSizeFromSrc(Op);
Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags);
if (Src2Op.IsGPR()) {
// If the source is a GPR then convert directly from the GPR.
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPROpSize(), Op->Flags);
return _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
} else if (SrcSize != DstElementSize) {
// If the source is from memory but the Source size and destination size aren't the same,
// then it is more optimal to load in to a GPR and convert between GPR->FPR.
// ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't.
auto Src2 = LoadSource(GPRClass, Op, Src2Op, Op->Flags);
return _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
}
// In the case of cvtsi2s{s,d} where the source and destination are the same size,
// then it is more optimal to load in to the FPR register directly and convert there.
auto Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags);
// Always signed
return _VSToFVectorInsert(IR::SizeToOpSize(DstSize), DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits);
return _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits);
}
template<IR::OpSize DstElementSize>

View File

@ -1797,7 +1797,7 @@
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / DstElementSize"
},
"FPR = VSToFGPRInsert OpSize:#RegisterSize, u8:#DstElementSize, u8:$SrcElementSize, FPR:$Vector, GPR:$Src, i1:$ZeroUpperBits": {
"FPR = VSToFGPRInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector, GPR:$Src, i1:$ZeroUpperBits": {
"Desc": ["Does a scalar 'cvt' between Vector1 and GPR.",
"Inserting the result in to the lower element of Vector1 and returning the results.",
"If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",