Arm64: Minor optimization to gather loads with no base addr register and SVE path

Arm64's SVE load instruction can be minorly optimized in the case that a
base GPR register isn't provided, as it has a version of the instruction
that doesn't require one.

The limitation of this instruction is that it doesn't support scaling at
all so it only works if the offset scale is 1.
This commit is contained in:
Ryan Houdek 2024-06-29 13:14:32 -07:00
parent e9a17b19c5
commit 58ea76eb24
No known key found for this signature in database

View File

@ -1008,16 +1008,6 @@ DEF_OP(VLoadVectorGatherMasked) {
ModType = ARMEmitter::SVEModType::MOD_LSL;
}
ARMEmitter::Register AddrReg = TMP1;
if (BaseAddr.has_value()) {
AddrReg = GetReg(Op->AddrBase.ID());
} else {
///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
}
const auto MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale);
const auto SubRegSize = ConvertSubRegSize8(IROp);
const auto CMPPredicate = ARMEmitter::PReg::p0;
@ -1027,6 +1017,19 @@ DEF_OP(VLoadVectorGatherMasked) {
cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);
auto TempDst = VTMP1;
// No need to load a temporary register in the case that we weren't provided a base address and there is no scaling.
ARMEmitter::SVEMemOperand MemDst {ARMEmitter::SVEMemOperand(VectorIndexLow.Z(), 0)};
if (BaseAddr.has_value() || OffsetScale != 1) {
ARMEmitter::Register AddrReg = TMP1;
if (BaseAddr.has_value()) {
AddrReg = GetReg(Op->AddrBase.ID());
} else {
///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
}
MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale);
}
switch (IROp->ElementSize) {
case 1: {
ld1b<ARMEmitter::SubRegSize::i8Bit>(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);