mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-02 12:37:14 +00:00
Arm64: Minor optimization to gather loads with no base addr register and SVE path
Arm64's SVE load instruction can be minorly optimized in the case that a base GPR register isn't provided, as it has a version of the instruction that doesn't require one. The limitation of this instruction is that it doesn't support scaling at all so it only works if the offset scale is 1.
This commit is contained in:
parent
e9a17b19c5
commit
58ea76eb24
@ -1008,16 +1008,6 @@ DEF_OP(VLoadVectorGatherMasked) {
|
||||
ModType = ARMEmitter::SVEModType::MOD_LSL;
|
||||
}
|
||||
|
||||
ARMEmitter::Register AddrReg = TMP1;
|
||||
|
||||
if (BaseAddr.has_value()) {
|
||||
AddrReg = GetReg(Op->AddrBase.ID());
|
||||
} else {
|
||||
///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
|
||||
LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
|
||||
}
|
||||
|
||||
const auto MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale);
|
||||
const auto SubRegSize = ConvertSubRegSize8(IROp);
|
||||
|
||||
const auto CMPPredicate = ARMEmitter::PReg::p0;
|
||||
@ -1027,6 +1017,19 @@ DEF_OP(VLoadVectorGatherMasked) {
|
||||
cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);
|
||||
auto TempDst = VTMP1;
|
||||
|
||||
// No need to load a temporary register in the case that we weren't provided a base address and there is no scaling.
|
||||
ARMEmitter::SVEMemOperand MemDst {ARMEmitter::SVEMemOperand(VectorIndexLow.Z(), 0)};
|
||||
if (BaseAddr.has_value() || OffsetScale != 1) {
|
||||
ARMEmitter::Register AddrReg = TMP1;
|
||||
if (BaseAddr.has_value()) {
|
||||
AddrReg = GetReg(Op->AddrBase.ID());
|
||||
} else {
|
||||
///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
|
||||
LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
|
||||
}
|
||||
MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale);
|
||||
}
|
||||
|
||||
switch (IROp->ElementSize) {
|
||||
case 1: {
|
||||
ld1b<ARMEmitter::SubRegSize::i8Bit>(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);
|
||||
|
Loading…
x
Reference in New Issue
Block a user