mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-11 06:56:12 +00:00
[mips][msa] Combine binsri-like DAG of AND and OR into equivalent VSELECT
(or (and $a, $mask), (and $b, $inverse_mask)) => (vselect $mask, $a, $b). where $mask is a constant splat. This allows bitwise operations to make use of bsel. It's also a stepping stone towards matching bins[lr], and bins[lr]i from normal IR. Two sets of similar tests have been added in this commit. The bsel_* functions test the case where binsri cannot be used. The binsr_*_i functions will start to use the binsri instruction in the next commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193682 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
aed9334acf
commit
a7c3cac871
@ -93,6 +93,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
|
||||
addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
|
||||
|
||||
setTargetDAGCombine(ISD::AND);
|
||||
setTargetDAGCombine(ISD::OR);
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
setTargetDAGCombine(ISD::VSELECT);
|
||||
setTargetDAGCombine(ISD::XOR);
|
||||
@ -487,6 +488,110 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Determine if the specified node is a constant vector splat.
|
||||
//
|
||||
// Returns true and sets Imm if:
|
||||
// * N is a ISD::BUILD_VECTOR representing a constant splat
|
||||
//
|
||||
// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
|
||||
// differences are that it assumes the MSA has already been checked and the
|
||||
// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
|
||||
// must not be in order for binsri.d to be selectable).
|
||||
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
|
||||
BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
|
||||
|
||||
if (Node == NULL)
|
||||
return false;
|
||||
|
||||
APInt SplatValue, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
||||
if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
|
||||
8, !IsLittleEndian))
|
||||
return false;
|
||||
|
||||
Imm = SplatValue;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Perform combines where ISD::OR is the root node.
|
||||
//
|
||||
// Performs the following transformations:
|
||||
// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
|
||||
// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
|
||||
// vector type.
|
||||
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const MipsSubtarget *Subtarget) {
|
||||
if (!Subtarget->hasMSA())
|
||||
return SDValue();
|
||||
|
||||
EVT Ty = N->getValueType(0);
|
||||
|
||||
if (!Ty.is128BitVector())
|
||||
return SDValue();
|
||||
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
|
||||
if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
|
||||
SDValue Op0Op0 = Op0->getOperand(0);
|
||||
SDValue Op0Op1 = Op0->getOperand(1);
|
||||
SDValue Op1Op0 = Op1->getOperand(0);
|
||||
SDValue Op1Op1 = Op1->getOperand(1);
|
||||
bool IsLittleEndian = !Subtarget->isLittle();
|
||||
|
||||
SDValue IfSet, IfClr, Cond;
|
||||
APInt Mask, InvMask;
|
||||
|
||||
// If Op0Op0 is an appropriate mask, try to find it's inverse in either
|
||||
// Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
|
||||
// looking.
|
||||
// IfClr will be set if we find a valid match.
|
||||
if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
|
||||
Cond = Op0Op0;
|
||||
IfSet = Op0Op1;
|
||||
|
||||
if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask)
|
||||
IfClr = Op1Op1;
|
||||
else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask)
|
||||
IfClr = Op1Op0;
|
||||
}
|
||||
|
||||
// If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
|
||||
// thing again using this mask.
|
||||
// IfClr will be set if we find a valid match.
|
||||
if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
|
||||
Cond = Op0Op1;
|
||||
IfSet = Op0Op0;
|
||||
|
||||
if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask)
|
||||
IfClr = Op1Op1;
|
||||
else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask)
|
||||
IfClr = Op1Op0;
|
||||
}
|
||||
|
||||
// At this point, IfClr will be set if we have a valid match.
|
||||
if (!IfClr.getNode())
|
||||
return SDValue();
|
||||
|
||||
assert(Cond.getNode() && IfSet.getNode());
|
||||
|
||||
// Fold degenerate cases.
|
||||
if (Mask.isAllOnesValue())
|
||||
return IfSet;
|
||||
else if (Mask == 0)
|
||||
return IfClr;
|
||||
|
||||
// Transform the DAG into an equivalent VSELECT.
|
||||
return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const MipsSubtarget *Subtarget) {
|
||||
@ -777,6 +882,9 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
case ISD::AND:
|
||||
Val = performANDCombine(N, DAG, DCI, Subtarget);
|
||||
break;
|
||||
case ISD::OR:
|
||||
Val = performORCombine(N, DAG, DCI, Subtarget);
|
||||
break;
|
||||
case ISD::SUBE:
|
||||
return performSUBECombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::MUL:
|
||||
|
@ -972,6 +972,170 @@ define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
|
||||
; CHECK: .size ctlz_v2i64
|
||||
}
|
||||
|
||||
define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
|
||||
; CHECK: bsel_v16i8:
|
||||
|
||||
%1 = load <16 x i8>* %a
|
||||
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <16 x i8>* %b
|
||||
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <16 x i8> %1, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6,
|
||||
i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
|
||||
%4 = and <16 x i8> %2, <i8 249, i8 249, i8 249, i8 249,
|
||||
i8 249, i8 249, i8 249, i8 249,
|
||||
i8 249, i8 249, i8 249, i8 249,
|
||||
i8 249, i8 249, i8 249, i8 249>
|
||||
%5 = or <16 x i8> %3, %4
|
||||
; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 6
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <16 x i8> %5, <16 x i8>* %c
|
||||
; CHECK-DAG: st.b [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size bsel_v16i8
|
||||
}
|
||||
|
||||
define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
|
||||
; CHECK: bsel_v8i16:
|
||||
|
||||
%1 = load <8 x i16>* %a
|
||||
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <8 x i16>* %b
|
||||
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
|
||||
i16 6, i16 6, i16 6, i16 6>
|
||||
%4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
|
||||
i16 65529, i16 65529, i16 65529, i16 65529>
|
||||
%5 = or <8 x i16> %3, %4
|
||||
; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <8 x i16> %5, <8 x i16>* %c
|
||||
; CHECK-DAG: st.h [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size bsel_v8i16
|
||||
}
|
||||
|
||||
define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
|
||||
; CHECK: bsel_v4i32:
|
||||
|
||||
%1 = load <4 x i32>* %a
|
||||
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <4 x i32>* %b
|
||||
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
|
||||
%4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
|
||||
%5 = or <4 x i32> %3, %4
|
||||
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <4 x i32> %5, <4 x i32>* %c
|
||||
; CHECK-DAG: st.w [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size bsel_v4i32
|
||||
}
|
||||
|
||||
define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
|
||||
; CHECK: bsel_v2i64:
|
||||
|
||||
%1 = load <2 x i64>* %a
|
||||
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <2 x i64>* %b
|
||||
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <2 x i64> %1, <i64 6, i64 6>
|
||||
%4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
|
||||
%5 = or <2 x i64> %3, %4
|
||||
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <2 x i64> %5, <2 x i64>* %c
|
||||
; CHECK-DAG: st.d [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size bsel_v2i64
|
||||
}
|
||||
|
||||
define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
|
||||
; CHECK: binsr_v16i8_i:
|
||||
|
||||
%1 = load <16 x i8>* %a
|
||||
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <16 x i8>* %b
|
||||
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
|
||||
i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
%4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
|
||||
i8 252, i8 252, i8 252, i8 252,
|
||||
i8 252, i8 252, i8 252, i8 252,
|
||||
i8 252, i8 252, i8 252, i8 252>
|
||||
%5 = or <16 x i8> %3, %4
|
||||
; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 3
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <16 x i8> %5, <16 x i8>* %c
|
||||
; CHECK-DAG: st.b [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size binsr_v16i8_i
|
||||
}
|
||||
|
||||
define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
|
||||
; CHECK: binsr_v8i16_i:
|
||||
|
||||
%1 = load <8 x i16>* %a
|
||||
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <8 x i16>* %b
|
||||
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
|
||||
i16 3, i16 3, i16 3, i16 3>
|
||||
%4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
|
||||
i16 65532, i16 65532, i16 65532, i16 65532>
|
||||
%5 = or <8 x i16> %3, %4
|
||||
; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <8 x i16> %5, <8 x i16>* %c
|
||||
; CHECK-DAG: st.h [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size binsr_v8i16_i
|
||||
}
|
||||
|
||||
define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
|
||||
; CHECK: binsr_v4i32_i:
|
||||
|
||||
%1 = load <4 x i32>* %a
|
||||
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <4 x i32>* %b
|
||||
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
|
||||
%4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
|
||||
%5 = or <4 x i32> %3, %4
|
||||
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <4 x i32> %5, <4 x i32>* %c
|
||||
; CHECK-DAG: st.w [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size binsr_v4i32_i
|
||||
}
|
||||
|
||||
define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
|
||||
; CHECK: binsr_v2i64_i:
|
||||
|
||||
%1 = load <2 x i64>* %a
|
||||
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
|
||||
%2 = load <2 x i64>* %b
|
||||
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
|
||||
%3 = and <2 x i64> %1, <i64 3, i64 3>
|
||||
%4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
|
||||
%5 = or <2 x i64> %3, %4
|
||||
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
|
||||
; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
|
||||
store <2 x i64> %5, <2 x i64>* %c
|
||||
; CHECK-DAG: st.d [[R3]], 0($4)
|
||||
|
||||
ret void
|
||||
; CHECK: .size binsr_v2i64_i
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
|
||||
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
|
||||
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
|
||||
|
Loading…
x
Reference in New Issue
Block a user