mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-24 12:55:45 +00:00
Add combiner patterns to more effectively utilize the BFI (bitfield insert)
instruction for non-constant operands. This includes the case referenced in the README.txt regarding a bitfield copy. llvm-svn: 108608
This commit is contained in:
parent
e31ee27cbe
commit
270540da7b
@ -4240,21 +4240,33 @@ static SDValue PerformMULCombine(SDNode *N,
|
||||
static SDValue PerformORCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
|
||||
// reasonable.
|
||||
|
||||
// BFI is only available on V6T2+
|
||||
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
|
||||
// or (and A, mask), val => ARMbfi A, val, mask
|
||||
// iff (val & mask) == val
|
||||
if (N0->getOpcode() != ISD::AND)
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
// 1) or (and A, mask), val => ARMbfi A, val, mask
|
||||
// iff (val & mask) == val
|
||||
//
|
||||
// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
|
||||
// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
|
||||
// && CountPopulation_32(mask) == CountPopulation_32(~mask2)
|
||||
// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
|
||||
// && CountPopulation_32(mask) == CountPopulation_32(~mask2)
|
||||
// (i.e., copy a bitfield value into another bitfield of the same width)
|
||||
if (N0.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
|
||||
// The value and the mask need to be constants so we can verify this is
|
||||
// actually a bitfield set. If the mask is 0xffff, we can do better
|
||||
// via a movt instruction, so don't use BFI in that case.
|
||||
@ -4264,21 +4276,61 @@ static SDValue PerformORCombine(SDNode *N,
|
||||
unsigned Mask = C->getZExtValue();
|
||||
if (Mask == 0xffff)
|
||||
return SDValue();
|
||||
C = dyn_cast<ConstantSDNode>(N1);
|
||||
if (!C)
|
||||
return SDValue();
|
||||
unsigned Val = C->getZExtValue();
|
||||
if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
|
||||
return SDValue();
|
||||
Val >>= CountTrailingZeros_32(~Mask);
|
||||
SDValue Res;
|
||||
// Case (1): or (and A, mask), val => ARMbfi A, val, mask
|
||||
if ((C = dyn_cast<ConstantSDNode>(N1))) {
|
||||
unsigned Val = C->getZExtValue();
|
||||
if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
|
||||
return SDValue();
|
||||
Val >>= CountTrailingZeros_32(~Mask);
|
||||
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
SDValue Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
|
||||
DAG.getConstant(Val, MVT::i32),
|
||||
DAG.getConstant(Mask, MVT::i32));
|
||||
Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
|
||||
DAG.getConstant(Val, MVT::i32),
|
||||
DAG.getConstant(Mask, MVT::i32));
|
||||
|
||||
// Do not add new nodes to DAG combiner worklist.
|
||||
DCI.CombineTo(N, Res, false);
|
||||
// Do not add new nodes to DAG combiner worklist.
|
||||
DCI.CombineTo(N, Res, false);
|
||||
} else if (N1.getOpcode() == ISD::AND) {
|
||||
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
|
||||
C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
|
||||
if (!C)
|
||||
return SDValue();
|
||||
unsigned Mask2 = C->getZExtValue();
|
||||
|
||||
if (ARM::isBitFieldInvertedMask(Mask) &&
|
||||
ARM::isBitFieldInvertedMask(~Mask2) &&
|
||||
(CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
|
||||
// The pack halfword instruction works better for masks that fit it,
|
||||
// so use that when it's available.
|
||||
if (Subtarget->hasT2ExtractPack() &&
|
||||
(Mask == 0xffff || Mask == 0xffff0000))
|
||||
return SDValue();
|
||||
// 2a
|
||||
unsigned lsb = CountTrailingZeros_32(Mask2);
|
||||
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
|
||||
DAG.getConstant(lsb, MVT::i32));
|
||||
Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
|
||||
DAG.getConstant(Mask, MVT::i32));
|
||||
// Do not add new nodes to DAG combiner worklist.
|
||||
DCI.CombineTo(N, Res, false);
|
||||
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
|
||||
ARM::isBitFieldInvertedMask(Mask2) &&
|
||||
(CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
|
||||
// The pack halfword instruction works better for masks that fit it,
|
||||
// so use that when it's available.
|
||||
if (Subtarget->hasT2ExtractPack() &&
|
||||
(Mask2 == 0xffff || Mask2 == 0xffff0000))
|
||||
return SDValue();
|
||||
// 2b
|
||||
unsigned lsb = CountTrailingZeros_32(Mask);
|
||||
Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
|
||||
DAG.getConstant(lsb, MVT::i32));
|
||||
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
|
||||
DAG.getConstant(Mask2, MVT::i32));
|
||||
// Do not add new nodes to DAG combiner worklist.
|
||||
DCI.CombineTo(N, Res, false);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -609,27 +609,6 @@ We currently generate:
|
||||
We should be able to replace the second ldr+and with a bic (i.e. reuse the
|
||||
constant which was already loaded). Not sure what's necessary to do that.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Given the following on ARMv7:
|
||||
int test1(int A, int B) {
|
||||
return (A&-8388481)|(B&8388480);
|
||||
}
|
||||
|
||||
We currently generate:
|
||||
bfc r0, #7, #16
|
||||
movw r2, #:lower16:8388480
|
||||
movt r2, #:upper16:8388480
|
||||
and r1, r1, r2
|
||||
orr r0, r1, r0
|
||||
bx lr
|
||||
|
||||
The following is much shorter:
|
||||
lsr r1, r1, #7
|
||||
bfi r0, r1, #7, #16
|
||||
bx lr
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
|
||||
|
@ -15,3 +15,26 @@ entry:
|
||||
store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: f2
|
||||
; CHECK: mov r1, r1, lsr #7
|
||||
; CHECK: bfi r0, r1, #7, #16
|
||||
%and = and i32 %A, -8388481 ; <i32> [#uses=1]
|
||||
%and2 = and i32 %B, 8388480 ; <i32> [#uses=1]
|
||||
%or = or i32 %and2, %and ; <i32> [#uses=1]
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: f3
|
||||
; CHECK: mov r2, r0, lsr #7
|
||||
; CHECK: mov r0, r1
|
||||
; CHECK: bfi r0, r2, #7, #16
|
||||
%and = and i32 %A, 8388480 ; <i32> [#uses=1]
|
||||
%and2 = and i32 %B, -8388481 ; <i32> [#uses=1]
|
||||
%or = or i32 %and2, %and ; <i32> [#uses=1]
|
||||
ret i32 %or
|
||||
}
|
||||
|
@ -15,3 +15,26 @@ entry:
|
||||
store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: f2
|
||||
; CHECK: lsrs r1, r1, #7
|
||||
; CHECK: bfi r0, r1, #7, #16
|
||||
%and = and i32 %A, -8388481 ; <i32> [#uses=1]
|
||||
%and2 = and i32 %B, 8388480 ; <i32> [#uses=1]
|
||||
%or = or i32 %and2, %and ; <i32> [#uses=1]
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
|
||||
entry:
|
||||
; CHECK: f3
|
||||
; CHECK: lsrs r2, r0, #7
|
||||
; CHECK: mov r0, r1
|
||||
; CHECK: bfi r0, r2, #7, #16
|
||||
%and = and i32 %A, 8388480 ; <i32> [#uses=1]
|
||||
%and2 = and i32 %B, -8388481 ; <i32> [#uses=1]
|
||||
%or = or i32 %and2, %and ; <i32> [#uses=1]
|
||||
ret i32 %or
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user