mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 07:31:53 +00:00
Lower vselects into X86ISD::BLENDI when appropriate.
LowerVSELECT will, if possible, generate a X86ISD::BLENDI DAG node if the condition is constant and we can emit that instruction, given the subtarget. This is not enough for all cases. An additional SELECTCombine optimization will be committed. Fixed tests that were expecting variable blends but where a blend+imm can be generated. Added test where we can't emit blend+immediate. Added avx2 blend+imm tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209043 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38cbea8057
commit
5ea7215050
@ -7971,7 +7971,87 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// This function assumes its argument is a BUILD_VECTOR of constand or
|
||||
// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
|
||||
// true.
|
||||
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
|
||||
unsigned &MaskValue) {
|
||||
MaskValue = 0;
|
||||
unsigned NumElems = BuildVector->getNumOperands();
|
||||
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
|
||||
unsigned NumLanes = (NumElems - 1) / 8 + 1;
|
||||
unsigned NumElemsInLane = NumElems / NumLanes;
|
||||
|
||||
// Blend for v16i16 should be symetric for the both lanes.
|
||||
for (unsigned i = 0; i < NumElemsInLane; ++i) {
|
||||
SDValue EltCond = BuildVector->getOperand(i);
|
||||
SDValue SndLaneEltCond =
|
||||
(NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
|
||||
|
||||
int Lane1Cond = -1, Lane2Cond = -1;
|
||||
if (isa<ConstantSDNode>(EltCond))
|
||||
Lane1Cond = !isZero(EltCond);
|
||||
if (isa<ConstantSDNode>(SndLaneEltCond))
|
||||
Lane2Cond = !isZero(SndLaneEltCond);
|
||||
|
||||
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
|
||||
MaskValue |= !!Lane1Cond << i;
|
||||
else if (Lane1Cond < 0)
|
||||
MaskValue |= !!Lane2Cond << i;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to lower a vselect node into a simple blend instruction.
|
||||
static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDValue Cond = Op.getOperand(0);
|
||||
SDValue LHS = Op.getOperand(1);
|
||||
SDValue RHS = Op.getOperand(2);
|
||||
SDLoc dl(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
||||
// There is no blend with immediate in AVX-512.
|
||||
if (VT.is512BitVector())
|
||||
return SDValue();
|
||||
|
||||
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
|
||||
return SDValue();
|
||||
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
|
||||
return SDValue();
|
||||
|
||||
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
|
||||
return SDValue();
|
||||
|
||||
// Check the mask for BLEND and build the value.
|
||||
unsigned MaskValue = 0;
|
||||
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
|
||||
return SDValue();
|
||||
|
||||
// Convert i32 vectors to floating point if it is not AVX2.
|
||||
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
|
||||
MVT BlendVT = VT;
|
||||
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
|
||||
BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
|
||||
NumElems);
|
||||
LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
|
||||
RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
|
||||
}
|
||||
|
||||
SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
|
||||
DAG.getConstant(MaskValue, MVT::i32));
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
|
||||
if (BlendOp.getNode())
|
||||
return BlendOp;
|
||||
|
||||
// Some types for vselect were previously set to Expand, not Legal or
|
||||
// Custom. Return an empty SDValue so we fall-through to Expand, after
|
||||
// the Custom lowering phase.
|
||||
@ -7984,7 +8064,9 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// This node is Legal.
|
||||
// We couldn't create a "Blend with immediate" node.
|
||||
// This node should still be legal, but we'll have to emit a blendv*
|
||||
// instruction.
|
||||
return Op;
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
; AVX128 tests:
|
||||
|
||||
;CHECK-LABEL: vsel_float:
|
||||
;CHECK: vblendvps
|
||||
;CHECK: vblendps $5
|
||||
;CHECK: ret
|
||||
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
|
||||
@ -12,7 +12,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
|
||||
|
||||
;CHECK-LABEL: vsel_i32:
|
||||
;CHECK: vblendvps
|
||||
;CHECK: vblendps $5
|
||||
;CHECK: ret
|
||||
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
|
||||
@ -52,7 +52,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_float8:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vblendvps
|
||||
;CHECK: vblendps $17
|
||||
;CHECK: ret
|
||||
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
|
||||
@ -61,7 +61,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_i328:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vblendvps
|
||||
;CHECK: vblendps $17
|
||||
;CHECK-NEXT: ret
|
||||
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
|
||||
@ -86,7 +86,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_double4:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vblendvpd
|
||||
;CHECK: vblendpd $5
|
||||
;CHECK-NEXT: ret
|
||||
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
|
||||
|
25
test/CodeGen/X86/avx2.ll
Normal file
25
test/CodeGen/X86/avx2.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
|
||||
|
||||
define <4 x i32> @blendvb_fallback_v4i32(<4 x i1> %mask, <4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: @blendvb_fallback_v4i32
|
||||
; CHECK: vblendvps
|
||||
; CHECK: ret
|
||||
%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @blendvb_fallback_v8i32(<8 x i1> %mask, <8 x i32> %x, <8 x i32> %y) {
|
||||
; CHECK-LABEL: @blendvb_fallback_v8i32
|
||||
; CHECK: vblendvps
|
||||
; CHECK: ret
|
||||
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @blendvb_fallback_v8f32(<8 x i1> %mask, <8 x float> %x, <8 x float> %y) {
|
||||
; CHECK-LABEL: @blendvb_fallback_v8f32
|
||||
; CHECK: vblendvps
|
||||
; CHECK: ret
|
||||
%ret = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
|
||||
ret <8 x float> %ret
|
||||
}
|
@ -4,7 +4,7 @@
|
||||
; Verify that we produce movss instead of blendvps when possible.
|
||||
|
||||
;CHECK-LABEL: vsel_float:
|
||||
;CHECK-NOT: blendvps
|
||||
;CHECK-NOT: blend
|
||||
;CHECK: movss
|
||||
;CHECK: ret
|
||||
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
@ -13,7 +13,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: vsel_4xi8:
|
||||
;CHECK-NOT: blendvps
|
||||
;CHECK-NOT: blend
|
||||
;CHECK: movss
|
||||
;CHECK: ret
|
||||
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
||||
@ -21,14 +21,8 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
||||
ret <4 x i8> %vsel
|
||||
}
|
||||
|
||||
|
||||
; We do not have native support for v8i16 blends and we have to use the
|
||||
; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not
|
||||
; reduce the mask in this case.
|
||||
;CHECK-LABEL: vsel_8xi16:
|
||||
;CHECK: andps
|
||||
;CHECK: andps
|
||||
;CHECK: orps
|
||||
;CHECK: pblendw $17
|
||||
;CHECK: ret
|
||||
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: vsel_float:
|
||||
;CHECK: blendvps
|
||||
;CHECK: blendps
|
||||
;CHECK: ret
|
||||
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2
|
||||
@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||
|
||||
|
||||
;CHECK-LABEL: vsel_4xi8:
|
||||
;CHECK: blendvps
|
||||
;CHECK: blendps
|
||||
;CHECK: ret
|
||||
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
|
||||
@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: vsel_4xi16:
|
||||
;CHECK: blendvps
|
||||
;CHECK: blendps
|
||||
;CHECK: ret
|
||||
define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
|
||||
@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
|
||||
|
||||
|
||||
;CHECK-LABEL: vsel_i32:
|
||||
;CHECK: blendvps
|
||||
;CHECK: blendps
|
||||
;CHECK: ret
|
||||
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
|
||||
|
@ -576,3 +576,11 @@ define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
|
||||
%res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: blendvb_fallback
|
||||
; CHECK: blendvb
|
||||
; CHECK: ret
|
||||
%ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user