mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-16 02:57:52 +00:00
AVX2: Build splat vectors by broadcasting a scalar from the constant pool.
Previously we used three instructions to broadcast an immediate value into a vector register. On Sandybridge we continue to load the broadcasted value from the constant pool. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154284 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
864737cc51
commit
9d68b06bc5
@ -4852,41 +4852,41 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
|
/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
|
||||||
/// a vbroadcast node. We support two patterns:
|
/// to generate a splat value for the following cases:
|
||||||
/// 1. A splat BUILD_VECTOR which uses a single scalar load.
|
/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
|
||||||
/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
|
/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
|
||||||
/// a scalar load.
|
/// a scalar load, or a constant.
|
||||||
/// The scalar load node is returned when a pattern is found,
|
/// The VBROADCAST node is returned when a pattern is found,
|
||||||
/// or SDValue() otherwise.
|
/// or SDValue() otherwise.
|
||||||
static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
|
static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
|
||||||
|
DebugLoc &dl, SelectionDAG &DAG) {
|
||||||
if (!Subtarget->hasAVX())
|
if (!Subtarget->hasAVX())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
SDValue V = Op;
|
|
||||||
|
|
||||||
if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
|
|
||||||
V = V.getOperand(0);
|
|
||||||
|
|
||||||
//A suspected load to be broadcasted.
|
|
||||||
SDValue Ld;
|
SDValue Ld;
|
||||||
|
bool ConstSplatVal;
|
||||||
|
|
||||||
switch (V.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
// Unknown pattern found.
|
// Unknown pattern found.
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
case ISD::BUILD_VECTOR: {
|
case ISD::BUILD_VECTOR: {
|
||||||
// The BUILD_VECTOR node must be a splat.
|
// The BUILD_VECTOR node must be a splat.
|
||||||
if (!isSplatVector(V.getNode()))
|
if (!isSplatVector(Op.getNode()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
Ld = V.getOperand(0);
|
Ld = Op.getOperand(0);
|
||||||
|
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
|
||||||
|
Ld.getOpcode() == ISD::ConstantFP);
|
||||||
|
|
||||||
// The suspected load node has several users. Make sure that all
|
// The suspected load node has several users. Make sure that all
|
||||||
// of its users are from the BUILD_VECTOR node.
|
// of its users are from the BUILD_VECTOR node.
|
||||||
if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
|
// Constants may have multiple users.
|
||||||
|
if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -4904,15 +4904,57 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
Ld = Sc.getOperand(0);
|
Ld = Sc.getOperand(0);
|
||||||
|
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
|
||||||
|
Ld.getOpcode() == ISD::ConstantFP);
|
||||||
|
|
||||||
// The scalar_to_vector node and the suspected
|
// The scalar_to_vector node and the suspected
|
||||||
// load node must have exactly one user.
|
// load node must have exactly one user.
|
||||||
if (!Sc.hasOneUse() || !Ld.hasOneUse())
|
// Constants may have multiple users.
|
||||||
|
if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Is256 = VT.getSizeInBits() == 256;
|
||||||
|
bool Is128 = VT.getSizeInBits() == 128;
|
||||||
|
|
||||||
|
// Handle the broadcasting a single constant scalar from the constant pool
|
||||||
|
// into a vector. On Sandybridge it is still better to load a constant vector
|
||||||
|
// from the constant pool and not to broadcast it from a scalar.
|
||||||
|
if (ConstSplatVal && Subtarget->hasAVX2()) {
|
||||||
|
EVT CVT = Ld.getValueType();
|
||||||
|
assert(!CVT.isVector() && "Must not broadcast a vector type");
|
||||||
|
unsigned ScalarSize = CVT.getSizeInBits();
|
||||||
|
|
||||||
|
if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
|
||||||
|
(Is128 && (ScalarSize == 32))) {
|
||||||
|
|
||||||
|
// This is the type of the load operation for the constant that we save
|
||||||
|
// in the constant pool. We can't load float values from the constant pool
|
||||||
|
// because the DAG has to be legal at this stage.
|
||||||
|
MVT LdTy = (ScalarSize == 32 ? MVT::i32 : MVT::i64);
|
||||||
|
|
||||||
|
const Constant *C = 0;
|
||||||
|
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
|
||||||
|
C = CI->getConstantIntValue();
|
||||||
|
else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
|
||||||
|
C = CF->getConstantFPValue();
|
||||||
|
|
||||||
|
assert(C && "Invalid constant type");
|
||||||
|
|
||||||
|
SDValue CP = DAG.getConstantPool(C, LdTy);
|
||||||
|
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
|
||||||
|
Ld = DAG.getLoad(LdTy, dl, DAG.getEntryNode(), CP,
|
||||||
|
MachinePointerInfo::getConstantPool(),
|
||||||
|
false, false, false, Alignment);
|
||||||
|
|
||||||
|
// Bitcast the loaded constant back to the requested type.
|
||||||
|
Ld = DAG.getNode(ISD::BITCAST, dl, CVT, Ld);
|
||||||
|
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The scalar source must be a normal load.
|
// The scalar source must be a normal load.
|
||||||
if (!ISD::isNormalLoad(Ld.getNode()))
|
if (!ISD::isNormalLoad(Ld.getNode()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -4921,28 +4963,26 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
|
|||||||
if (Ld->hasAnyUseOfValue(1))
|
if (Ld->hasAnyUseOfValue(1))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
bool Is256 = VT.getSizeInBits() == 256;
|
|
||||||
bool Is128 = VT.getSizeInBits() == 128;
|
|
||||||
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
|
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
|
||||||
|
|
||||||
// VBroadcast to YMM
|
// VBroadcast to YMM
|
||||||
if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
|
if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
|
||||||
return Ld;
|
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
|
||||||
|
|
||||||
// VBroadcast to XMM
|
// VBroadcast to XMM
|
||||||
if (Is128 && (ScalarSize == 32))
|
if (Is128 && (ScalarSize == 32))
|
||||||
return Ld;
|
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
|
||||||
|
|
||||||
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
|
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
|
||||||
// double since there is vbroadcastsd xmm
|
// double since there is vbroadcastsd xmm
|
||||||
if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
|
if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
|
||||||
// VBroadcast to YMM
|
// VBroadcast to YMM
|
||||||
if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
|
if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
|
||||||
return Ld;
|
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
|
||||||
|
|
||||||
// VBroadcast to XMM
|
// VBroadcast to XMM
|
||||||
if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64))
|
if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64))
|
||||||
return Ld;
|
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unsupported broadcast.
|
// Unsupported broadcast.
|
||||||
@ -4977,9 +5017,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
|
return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue LD = isVectorBroadcast(Op, Subtarget);
|
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
|
||||||
if (LD.getNode())
|
if (Broadcast.getNode())
|
||||||
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
|
return Broadcast;
|
||||||
|
|
||||||
unsigned EVTBits = ExtVT.getSizeInBits();
|
unsigned EVTBits = ExtVT.getSizeInBits();
|
||||||
|
|
||||||
@ -6205,9 +6245,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
|
|||||||
int Size = VT.getSizeInBits();
|
int Size = VT.getSizeInBits();
|
||||||
|
|
||||||
// Use vbroadcast whenever the splat comes from a foldable load
|
// Use vbroadcast whenever the splat comes from a foldable load
|
||||||
SDValue LD = isVectorBroadcast(Op, Subtarget);
|
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
|
||||||
if (LD.getNode())
|
if (Broadcast.getNode())
|
||||||
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
|
return Broadcast;
|
||||||
|
|
||||||
// Handle splats by matching through known shuffle masks
|
// Handle splats by matching through known shuffle masks
|
||||||
if ((Size == 128 && NumElem <= 4) ||
|
if ((Size == 128 && NumElem <= 4) ||
|
||||||
|
@ -47,7 +47,7 @@ entry:
|
|||||||
;;;; 128-bit versions
|
;;;; 128-bit versions
|
||||||
|
|
||||||
; CHECK: vbroadcastss (%
|
; CHECK: vbroadcastss (%
|
||||||
define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
|
define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
%q = load float* %ptr, align 4
|
%q = load float* %ptr, align 4
|
||||||
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||||
@ -57,6 +57,19 @@ entry:
|
|||||||
ret <4 x float> %vecinit6.i
|
ret <4 x float> %vecinit6.i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK: _e2
|
||||||
|
; CHECK-NOT: vbroadcastss
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
|
||||||
|
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
|
||||||
|
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
|
||||||
|
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
|
||||||
|
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
|
||||||
|
ret <4 x float> %vecinit6.i
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
; CHECK: vbroadcastss (%
|
; CHECK: vbroadcastss (%
|
||||||
define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
|
define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
@ -71,7 +84,7 @@ entry:
|
|||||||
; Unsupported vbroadcasts
|
; Unsupported vbroadcasts
|
||||||
|
|
||||||
; CHECK: _G
|
; CHECK: _G
|
||||||
; CHECK-NOT: vbroadcastsd (%
|
; CHECK-NOT: broadcast (%
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
|
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
@ -82,7 +95,7 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: _H
|
; CHECK: _H
|
||||||
; CHECK-NOT: vbroadcastss
|
; CHECK-NOT: broadcast
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x i32> @H(<4 x i32> %a) {
|
define <4 x i32> @H(<4 x i32> %a) {
|
||||||
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||||
@ -90,7 +103,7 @@ define <4 x i32> @H(<4 x i32> %a) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: _I
|
; CHECK: _I
|
||||||
; CHECK-NOT: vbroadcastsd (%
|
; CHECK-NOT: broadcast (%
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
|
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
|
@ -150,3 +150,24 @@ entry:
|
|||||||
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
|
||||||
ret <2 x double> %vecinit2.i
|
ret <2 x double> %vecinit2.i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: V111
|
||||||
|
; CHECK: vpbroadcastd
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||||
|
ret <8 x i32> %g
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: _e2
|
||||||
|
; CHECK: vbroadcastss
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
|
||||||
|
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
|
||||||
|
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
|
||||||
|
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
|
||||||
|
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
|
||||||
|
ret <4 x float> %vecinit6.i
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user