[AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.

Patch by Sanjin Sijaric <ssijaric@codeaurora.org>!
Phabricator Review: http://reviews.llvm.org/D5103

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2014-09-08 14:43:48 +00:00
parent 1ef487d463
commit b30d031de4
5 changed files with 194 additions and 0 deletions

View File

@ -1192,6 +1192,20 @@ public:
return nullptr;
}
// areMemAccessesTriviallyDisjoint - Sometimes, it is possible for the target
// to tell, even without aliasing information, that two MIs access different
// memory addresses. This function returns true if two MIs access different
// memory addresses, and false otherwise.
virtual bool
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
AliasAnalysis *AA = nullptr) const {
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must load from or modify a memory location");
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
"MIb must load from or modify a memory location");
return false;
}
private:
int CallFrameSetupOpcode, CallFrameDestroyOpcode;
};

View File

@ -511,9 +511,18 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Cover a trivial case - no edge is need to itself.
if (MIa == MIb)
return false;
// Let the target decide if memory accesses cannot possibly overlap.
if ((MIa->mayLoad() || MIa->mayStore()) &&
(MIb->mayLoad() || MIb->mayStore()))
if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())

View File

@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
}
bool
AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
MachineInstr *MIb,
AliasAnalysis *AA) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned BaseRegA = 0, BaseRegB = 0;
int OffsetA = 0, OffsetB = 0;
int WidthA = 0, WidthB = 0;
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must be a store or a load");
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
"MIb must be a store or a load");
if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
return false;
// Retrieve the base register, offset from the base register and width. Width
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
// base registers are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
if (BaseRegA == BaseRegB) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
return true;
}
}
return false;
}
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
};
}
bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
const TargetRegisterInfo *TRI) const {
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt->getNumOperands() != 3)
return false;
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
return false;
// Offset is calculated as the immediate operand multiplied by the scaling factor.
// Unscaled instructions have scaling factor set to 1.
int Scale = 0;
switch (LdSt->getOpcode()) {
default:
return false;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
Scale = 1;
break;
case AArch64::LDURXi:
case AArch64::LDURDi:
case AArch64::STURXi:
case AArch64::STURDi:
Width = 8;
Scale = 1;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
case AArch64::STURWi:
case AArch64::STURSi:
Width = 4;
Scale = 1;
break;
case AArch64::LDURHi:
case AArch64::LDURHHi:
case AArch64::LDURSHXi:
case AArch64::LDURSHWi:
case AArch64::STURHi:
case AArch64::STURHHi:
Width = 2;
Scale = 1;
break;
case AArch64::LDURBi:
case AArch64::LDURBBi:
case AArch64::LDURSBXi:
case AArch64::LDURSBWi:
case AArch64::STURBi:
case AArch64::STURBBi:
Width = 1;
Scale = 1;
break;
case AArch64::LDRXui:
case AArch64::STRXui:
Scale = Width = 8;
break;
case AArch64::LDRWui:
case AArch64::STRWui:
Scale = Width = 4;
break;
case AArch64::LDRBui:
case AArch64::STRBui:
Scale = Width = 1;
break;
case AArch64::LDRHui:
case AArch64::STRHui:
Scale = Width = 2;
break;
case AArch64::LDRSui:
case AArch64::STRSui:
Scale = Width = 4;
break;
case AArch64::LDRDui:
case AArch64::STRDui:
Scale = Width = 8;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
break;
case AArch64::LDRBBui:
case AArch64::STRBBui:
Scale = Width = 1;
break;
case AArch64::LDRHHui:
case AArch64::STRHHui:
Scale = Width = 2;
break;
};
BaseReg = LdSt->getOperand(1).getReg();
Offset = LdSt->getOperand(2).getImm() * Scale;
return true;
}
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.

View File

@ -52,6 +52,10 @@ public:
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const override;
bool
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
AliasAnalysis *AA = nullptr) const override;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr *MI,
@ -90,6 +94,10 @@ public:
unsigned &Offset,
const TargetRegisterInfo *TRI) const override;
bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
int &Offset, int &Width,
const TargetRegisterInfo *TRI) const;
bool enableClusterLoads() const override { return true; }
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,

View File

@ -0,0 +1,31 @@
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -enable-aa-sched-mi | FileCheck %s
; Check that the scheduler moves the load from a[1] past the store into a[2].
@a = common global i32* null, align 8
@m = common global i32 0, align 4
; Function Attrs: nounwind
define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
entry:
; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
%0 = load i32** @a, align 8, !tbaa !1
%arrayidx = getelementptr inbounds i32* %0, i64 2
store i32 %i, i32* %arrayidx, align 4, !tbaa !5
%arrayidx1 = getelementptr inbounds i32* %0, i64 1
%1 = load i32* %arrayidx1, align 4, !tbaa !5
%add = add nsw i32 %k, %i
store i32 %add, i32* @m, align 4, !tbaa !5
ret i32 %1
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.6.0 "}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
!5 = metadata !{metadata !6, metadata !6, i64 0}
!6 = metadata !{metadata !"int", metadata !3, i64 0}