mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-01 08:28:19 +00:00
Use 256-bit vcmpeqd for creating an all ones vector when AVX2 is enabled.
llvm-svn: 145004
This commit is contained in:
parent
3e24dc25b2
commit
6ed413c495
@ -4387,23 +4387,30 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
/// getOnesVector - Returns a vector of specified type with all bits set.
|
||||
/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
|
||||
/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
|
||||
/// original type, ensuring they get CSE'd.
|
||||
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
|
||||
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
|
||||
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
|
||||
/// Then bitcast to their original type, ensuring they get CSE'd.
|
||||
static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
|
||||
DebugLoc dl) {
|
||||
assert(VT.isVector() && "Expected a vector type");
|
||||
assert((VT.is128BitVector() || VT.is256BitVector())
|
||||
&& "Expected a 128-bit or 256-bit vector type");
|
||||
|
||||
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
|
||||
Cst, Cst, Cst, Cst);
|
||||
|
||||
if (VT.is256BitVector()) {
|
||||
SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
|
||||
Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||
Vec = Insert128BitVector(InsV, Vec,
|
||||
DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
|
||||
SDValue Vec;
|
||||
if (VT.getSizeInBits() == 256) {
|
||||
if (HasAVX2) { // AVX2
|
||||
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
|
||||
} else { // AVX
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
|
||||
SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
|
||||
Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||
Vec = Insert128BitVector(InsV, Vec,
|
||||
DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
|
||||
}
|
||||
} else {
|
||||
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
|
||||
@ -5221,12 +5228,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
|
||||
// vectors or broken into v4i32 operations on 256-bit vectors.
|
||||
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
|
||||
// vpcmpeqd on 256-bit vectors.
|
||||
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
|
||||
if (Op.getValueType() == MVT::v4i32)
|
||||
if (Op.getValueType() == MVT::v4i32 ||
|
||||
(Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2()))
|
||||
return Op;
|
||||
|
||||
return getOnesVector(Op.getValueType(), DAG, dl);
|
||||
return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl);
|
||||
}
|
||||
|
||||
SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2());
|
||||
|
@ -2902,6 +2902,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
switch (LoadMI->getOpcode()) {
|
||||
case X86::AVX_SET0PSY:
|
||||
case X86::AVX_SET0PDY:
|
||||
case X86::AVX2_SETALLONES:
|
||||
Alignment = 32;
|
||||
break;
|
||||
case X86::V_SET0:
|
||||
@ -2947,6 +2948,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
case X86::AVX_SET0PSY:
|
||||
case X86::AVX_SET0PDY:
|
||||
case X86::AVX_SETALLONES:
|
||||
case X86::AVX2_SETALLONES:
|
||||
case X86::FsFLD0SD:
|
||||
case X86::FsFLD0SS:
|
||||
case X86::VFsFLD0SD:
|
||||
@ -2985,7 +2987,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
else
|
||||
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
|
||||
|
||||
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
|
||||
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
|
||||
Opc == X86::AVX2_SETALLONES);
|
||||
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
|
||||
Constant::getNullValue(Ty);
|
||||
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
|
||||
|
@ -318,6 +318,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
|
||||
def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
|
||||
[(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX2] in
|
||||
def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
|
||||
[(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -376,6 +376,7 @@ ReSimplify:
|
||||
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
|
||||
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
|
||||
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
|
||||
case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
|
||||
|
||||
case X86::MOV16r0:
|
||||
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
|
||||
|
@ -82,3 +82,15 @@ entry:
|
||||
%cond = or <8 x i32> %1, %2
|
||||
ret <8 x i32> %cond
|
||||
}
|
||||
|
||||
define <8 x i32> @allOnes() nounwind {
|
||||
; CHECK: vpcmpeqd
|
||||
; CHECK-NOT: vinsert
|
||||
ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
}
|
||||
|
||||
define <16 x i16> @allOnes2() nounwind {
|
||||
; CHECK: vpcmpeqd
|
||||
; CHECK-NOT: vinsert
|
||||
ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user