[PowerPC] Materialize i64 constants using rotation

Materializing full 64-bit constants on PPC64 can be expensive, requiring up to
5 instructions depending on the locations of the non-zero bits. Sometimes
materializing a rotated constant, and then applying the inverse rotation, requires
fewer instructions than the direct method. If so, do that instead.

In r225132, I added support for forming constants using bit inversion. In
effect, this reverts that commit and replaces it with rotation support. The bit
inversion is useful for turning constants that are mostly ones into ones that
are mostly zeros (thus enabling a more-efficient shift-based materialization),
but the same effect can be obtained by using negative constants and a rotate,
and that is at least as efficient, if not more.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225135 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-01-04 15:43:55 +00:00
parent 7362319120
commit 2ac0826af3
3 changed files with 76 additions and 32 deletions

View File

@ -602,16 +602,19 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
return Result;
}
static uint64_t Rot64(uint64_t Imm, unsigned R) {
return (Imm << R) | (Imm >> (64 - R));
}
static unsigned SelectInt64Count(int64_t Imm) {
unsigned DirectCount = SelectInt64CountDirect(Imm);
unsigned Count = SelectInt64CountDirect(Imm);
// If might be cheaper to materialize the bit-inverted constant, and then
// flip the bits (which takes one nor instruction).
unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1;
if (NotDirectCount < DirectCount)
return NotDirectCount;
for (unsigned r = 1; r < 63; ++r) {
unsigned RCount = SelectInt64CountDirect(Rot64(Imm, r)) + 1;
Count = std::min(Count, RCount);
}
return DirectCount;
return Count;
}
// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
@ -691,19 +694,27 @@ static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
}
static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
unsigned DirectCount = SelectInt64CountDirect(Imm);
unsigned Count = SelectInt64CountDirect(Imm);
unsigned RMin = 0;
// If might be cheaper to materialize the bit-inverted constant, and then
// flip the bits (which takes one nor instruction).
unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1;
if (NotDirectCount < DirectCount) {
SDValue NotDirectVal =
SDValue(SelectInt64Direct(CurDAG, dl, ~(uint64_t) Imm), 0);
return CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, NotDirectVal,
NotDirectVal);
for (unsigned r = 1; r < 63; ++r) {
unsigned RCount = SelectInt64CountDirect(Rot64(Imm, r)) + 1;
if (RCount < Count) {
Count = RCount;
RMin = r;
}
}
return SelectInt64Direct(CurDAG, dl, Imm);
if (!RMin)
return SelectInt64Direct(CurDAG, dl, Imm);
auto getI32Imm = [CurDAG](unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
};
SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, Rot64(Imm, RMin)), 0);
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Val,
getI32Imm(64 - RMin), getI32Imm(0));
}
// Select a 64-bit constant.

View File

@ -8,7 +8,6 @@ TODO:
On PPC64, this:
long f2 (long x) { return 0xfffffff000000000UL; }
long f3 (long x) { return 0x1ffffffffUL; }
could compile into:
@ -16,10 +15,6 @@ _f2:
li r3,-1
rldicr r3,r3,0,27
blr
_f3:
li r3,-1
rldicl r3,r3,0,31
blr
we produce:
@ -28,12 +23,6 @@ _f2:
ori r2, r2, 65535
sldi r3, r2, 36
blr
_f3:
li r2, 1
sldi r2, r2, 32
oris r2, r2, 65535
ori r3, r2, 65535
blr
===-------------------------------------------------------------------------===

View File

@ -8,13 +8,57 @@ entry:
ret i64 281474976710655
; CHECK-LABEL: @cn1
; CHECK: li [[REG1:[0-9]+]], 0
; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535
; CHECK: sldi [[REG3:[0-9]+]], [[REG2]], 48
; CHECK: nor 3, [[REG3]], [[REG3]]
; CHECK: lis [[REG1:[0-9]+]], -1
; CHECK: rldicl 3, [[REG1]], 48, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @cnb() #0 {
entry:
ret i64 281474976710575
; CHECK-LABEL: @cnb
; CHECK: lis [[REG1:[0-9]+]], -81
; CHECK: rldicl 3, [[REG1]], 48, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @f2n(i64 %x) #0 {
entry:
ret i64 68719476735
; CHECK-LABEL: @f2n
; CHECK: lis [[REG1:[0-9]+]], -4096
; CHECK: rldicl 3, [[REG1]], 36, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @f3(i64 %x) #0 {
entry:
ret i64 8589934591
; CHECK-LABEL: @f3
; CHECK: lis [[REG1:[0-9]+]], -32768
; CHECK: rldicl 3, [[REG1]], 33, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @cn2n() #0 {
entry:
ret i64 -1407374887747585
; CHECK-LABEL: @cn2n
; CHECK: lis [[REG1:[0-9]+]], -5121
; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65534
; CHECK: rldicl 3, [[REG2]], 22, 0
; CHECK: blr
}
attributes #0 = { nounwind readnone }