mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-23 20:57:21 +00:00
We are missing opportunites to use ldm. Take code like this:
void t(int *cp0, int *cp1, int *dp, int fmd) { int c0, c1, d0, d1, d2, d3; c0 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000); c1 = (*cp0++ & 0xffff) | ((*cp1++ << 16) & 0xffff0000); /* ... */ } It code gens into something pretty bad. But with this change (analogous to the X86 back-end), it will use ldm and generate few instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106693 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8ff72b5344
commit
4b722108e2
@ -1306,6 +1306,107 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
|
||||
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
|
||||
}
|
||||
|
||||
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
|
||||
/// determine if two loads are loading from the same base address. It should
|
||||
/// only return true if the base pointers are the same and the only differences
|
||||
/// between the two addresses is the offset. It also returns the offsets by
|
||||
/// reference.
|
||||
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
int64_t &Offset1,
|
||||
int64_t &Offset2) const {
|
||||
// Don't worry about Thumb: just ARM and Thumb2.
|
||||
if (Subtarget.isThumb1Only()) return false;
|
||||
|
||||
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
|
||||
return false;
|
||||
|
||||
switch (Load1->getMachineOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case ARM::LDR:
|
||||
case ARM::LDRB:
|
||||
case ARM::LDRD:
|
||||
case ARM::LDRH:
|
||||
case ARM::LDRSB:
|
||||
case ARM::LDRSH:
|
||||
case ARM::VLDRD:
|
||||
case ARM::VLDRS:
|
||||
case ARM::t2LDRi8:
|
||||
case ARM::t2LDRDi8:
|
||||
case ARM::t2LDRSHi8:
|
||||
case ARM::t2LDRi12:
|
||||
case ARM::t2LDRSHi12:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (Load2->getMachineOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case ARM::LDR:
|
||||
case ARM::LDRB:
|
||||
case ARM::LDRD:
|
||||
case ARM::LDRH:
|
||||
case ARM::LDRSB:
|
||||
case ARM::LDRSH:
|
||||
case ARM::VLDRD:
|
||||
case ARM::VLDRS:
|
||||
case ARM::t2LDRi8:
|
||||
case ARM::t2LDRDi8:
|
||||
case ARM::t2LDRSHi8:
|
||||
case ARM::t2LDRi12:
|
||||
case ARM::t2LDRSHi12:
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if base addresses and chain operands match.
|
||||
if (Load1->getOperand(0) != Load2->getOperand(0) ||
|
||||
Load1->getOperand(4) != Load2->getOperand(4))
|
||||
return false;
|
||||
|
||||
// Index should be Reg0.
|
||||
if (Load1->getOperand(3) != Load2->getOperand(3))
|
||||
return false;
|
||||
|
||||
// Determine the offsets.
|
||||
if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Load2->getOperand(1))) {
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
|
||||
Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
|
||||
/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
|
||||
/// be scheduled togther. On some targets if two loads are loading from
|
||||
/// addresses in the same cache line, it's better if they are scheduled
|
||||
/// together. This function takes two integers that represent the load offsets
|
||||
/// from the common base address. It returns true if it decides it's desirable
|
||||
/// to schedule the two loads together. "NumLoads" is the number of loads that
|
||||
/// have already been scheduled after Load1.
|
||||
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const {
|
||||
// Don't worry about Thumb: just ARM and Thumb2.
|
||||
if (Subtarget.isThumb1Only()) return false;
|
||||
|
||||
assert(Offset2 > Offset1);
|
||||
|
||||
if ((Offset2 - Offset1) / 8 > 64)
|
||||
return false;
|
||||
|
||||
if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
|
||||
return false; // FIXME: overly conservative?
|
||||
|
||||
// Four loads in a row should be sufficient.
|
||||
if (NumLoads >= 3)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
|
||||
const MachineBasicBlock *MBB,
|
||||
const MachineFunction &MF) const {
|
||||
|
@ -320,6 +320,26 @@ public:
|
||||
virtual bool produceSameValue(const MachineInstr *MI0,
|
||||
const MachineInstr *MI1) const;
|
||||
|
||||
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
|
||||
/// determine if two loads are loading from the same base address. It should
|
||||
/// only return true if the base pointers are the same and the only
|
||||
/// differences between the two addresses is the offset. It also returns the
|
||||
/// offsets by reference.
|
||||
virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
int64_t &Offset1, int64_t &Offset2)const;
|
||||
|
||||
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
|
||||
/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
|
||||
/// be scheduled togther. On some targets if two loads are loading from
|
||||
/// addresses in the same cache line, it's better if they are scheduled
|
||||
/// together. This function takes two integers that represent the load offsets
|
||||
/// from the common base address. It returns true if it decides it's desirable
|
||||
/// to schedule the two loads together. "NumLoads" is the number of loads that
|
||||
/// have already been scheduled after Load1.
|
||||
virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const;
|
||||
|
||||
virtual bool isSchedulingBoundary(const MachineInstr *MI,
|
||||
const MachineBasicBlock *MBB,
|
||||
const MachineFunction &MF) const;
|
||||
|
@ -626,9 +626,11 @@ bb24: ; preds = %bb23
|
||||
; LSR should use count-down iteration to avoid requiring the trip count
|
||||
; in a register, and it shouldn't require any reloads here.
|
||||
|
||||
; CHECK: subs r3, #1
|
||||
; CHECK-NEXT: cmp r3, #0
|
||||
; CHECK-NEXT: bne.w
|
||||
; CHECK: @ %bb24
|
||||
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
|
||||
; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1
|
||||
; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0
|
||||
; CHECK-NEXT: bne.w
|
||||
|
||||
%92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1]
|
||||
%indvar.next79 = add i32 %indvar78, 1 ; <i32> [#uses=1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user