mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-03 07:38:57 +00:00
[VE] v256.32|64 gather|scatter isel and tests
This adds support for v256.32|64 scatter|gather isel. vp.gather|scatter and regular gather|scatter intrinsics are both lowered to the internal VVP layer. Splitting these ops on v512.32 is the subject of future patches. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D121288
This commit is contained in:
parent
873f081e5a
commit
a5f1262332
@ -277,6 +277,22 @@ SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getGatherScatterIndex(SDValue Op) {
|
||||
if (auto *N = dyn_cast<MaskedGatherScatterSDNode>(Op.getNode()))
|
||||
return N->getIndex();
|
||||
if (auto *N = dyn_cast<VPGatherScatterSDNode>(Op.getNode()))
|
||||
return N->getIndex();
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getGatherScatterScale(SDValue Op) {
|
||||
if (auto *N = dyn_cast<MaskedGatherScatterSDNode>(Op.getNode()))
|
||||
return N->getScale();
|
||||
if (auto *N = dyn_cast<VPGatherScatterSDNode>(Op.getNode()))
|
||||
return N->getScale();
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getStoredValue(SDValue Op) {
|
||||
switch (Op->getOpcode()) {
|
||||
case VEISD::VVP_STORE:
|
||||
@ -288,12 +304,19 @@ SDValue getStoredValue(SDValue Op) {
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<VPStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<MaskedScatterSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<VPScatterSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getNodePassthru(SDValue Op) {
|
||||
if (auto *N = dyn_cast<MaskedLoadSDNode>(Op.getNode()))
|
||||
return N->getPassThru();
|
||||
if (auto *N = dyn_cast<MaskedGatherSDNode>(Op.getNode()))
|
||||
return N->getPassThru();
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -450,4 +473,30 @@ SDValue VECustomDAG::getSplitPtrStride(SDValue PackStride) const {
|
||||
return getNode(ISD::SHL, MVT::i64, {PackStride, getConstant(1, MVT::i32)});
|
||||
}
|
||||
|
||||
SDValue VECustomDAG::getGatherScatterAddress(SDValue BasePtr, SDValue Scale,
|
||||
SDValue Index, SDValue Mask,
|
||||
SDValue AVL) const {
|
||||
EVT IndexVT = Index.getValueType();
|
||||
|
||||
// Apply scale.
|
||||
SDValue ScaledIndex;
|
||||
if (!Scale || isOneConstant(Scale))
|
||||
ScaledIndex = Index;
|
||||
else {
|
||||
SDValue ScaleBroadcast = getBroadcast(IndexVT, Scale, AVL);
|
||||
ScaledIndex =
|
||||
getNode(VEISD::VVP_MUL, IndexVT, {Index, ScaleBroadcast, Mask, AVL});
|
||||
}
|
||||
|
||||
// Add basePtr.
|
||||
if (isNullConstant(BasePtr))
|
||||
return ScaledIndex;
|
||||
|
||||
// re-constitute pointer vector (basePtr + index * scale)
|
||||
SDValue BaseBroadcast = getBroadcast(IndexVT, BasePtr, AVL);
|
||||
auto ResPtr =
|
||||
getNode(VEISD::VVP_ADD, IndexVT, {BaseBroadcast, ScaledIndex, Mask, AVL});
|
||||
return ResPtr;
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
@ -102,6 +102,10 @@ SDValue getStoredValue(SDValue Op);
|
||||
|
||||
SDValue getNodePassthru(SDValue Op);
|
||||
|
||||
SDValue getGatherScatterIndex(SDValue Op);
|
||||
|
||||
SDValue getGatherScatterScale(SDValue Op);
|
||||
|
||||
/// } Node Properties
|
||||
|
||||
enum class Packing {
|
||||
@ -193,6 +197,11 @@ public:
|
||||
SDValue getSplitPtrOffset(SDValue Ptr, SDValue ByteStride,
|
||||
PackElem Part) const;
|
||||
SDValue getSplitPtrStride(SDValue PackStride) const;
|
||||
SDValue getGatherScatterAddress(SDValue BasePtr, SDValue Scale, SDValue Index,
|
||||
SDValue Mask, SDValue AVL) const;
|
||||
EVT getVectorVT(EVT ElemVT, unsigned NumElems) const {
|
||||
return EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
@ -186,7 +186,8 @@ public:
|
||||
|
||||
/// VVP Lowering {
|
||||
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG&) const;
|
||||
SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG &) const;
|
||||
SDValue lowerVVP_GATHER_SCATTER(SDValue Op, VECustomDAG &) const;
|
||||
|
||||
SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue legalizeInternalLoadStoreOp(SDValue Op, VECustomDAG &CDAG) const;
|
||||
|
@ -51,7 +51,10 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
|
||||
case VEISD::VVP_LOAD:
|
||||
case VEISD::VVP_STORE:
|
||||
return lowerVVP_LOAD_STORE(Op, CDAG);
|
||||
};
|
||||
case VEISD::VVP_GATHER:
|
||||
case VEISD::VVP_SCATTER:
|
||||
return lowerVVP_GATHER_SCATTER(Op, CDAG);
|
||||
}
|
||||
|
||||
EVT OpVecVT = Op.getValueType();
|
||||
EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
|
||||
@ -235,6 +238,54 @@ SDValue VETargetLowering::splitPackedLoadStore(SDValue Op,
|
||||
return CDAG.getMergeValues({PackedVals, FusedChains});
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::lowerVVP_GATHER_SCATTER(SDValue Op,
|
||||
VECustomDAG &CDAG) const {
|
||||
EVT DataVT = *getIdiomaticVectorType(Op.getNode());
|
||||
auto Packing = getTypePacking(DataVT);
|
||||
MVT LegalDataVT =
|
||||
getLegalVectorType(Packing, DataVT.getVectorElementType().getSimpleVT());
|
||||
|
||||
SDValue AVL = getAnnotatedNodeAVL(Op).first;
|
||||
SDValue Index = getGatherScatterIndex(Op);
|
||||
SDValue BasePtr = getMemoryPtr(Op);
|
||||
SDValue Mask = getNodeMask(Op);
|
||||
SDValue Chain = getNodeChain(Op);
|
||||
SDValue Scale = getGatherScatterScale(Op);
|
||||
SDValue PassThru = getNodePassthru(Op);
|
||||
SDValue StoredValue = getStoredValue(Op);
|
||||
if (PassThru && PassThru->isUndef())
|
||||
PassThru = SDValue();
|
||||
|
||||
bool IsScatter = (bool)StoredValue;
|
||||
|
||||
// TODO: Infer lower AVL from mask.
|
||||
if (!AVL)
|
||||
AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32);
|
||||
|
||||
// Default to the all-true mask.
|
||||
if (!Mask)
|
||||
Mask = CDAG.getConstantMask(Packing, true);
|
||||
|
||||
SDValue AddressVec =
|
||||
CDAG.getGatherScatterAddress(BasePtr, Scale, Index, Mask, AVL);
|
||||
if (IsScatter)
|
||||
return CDAG.getNode(VEISD::VVP_SCATTER, MVT::Other,
|
||||
{Chain, StoredValue, AddressVec, Mask, AVL});
|
||||
|
||||
// Gather.
|
||||
SDValue NewLoadV = CDAG.getNode(VEISD::VVP_GATHER, {LegalDataVT, MVT::Other},
|
||||
{Chain, AddressVec, Mask, AVL});
|
||||
|
||||
if (!PassThru)
|
||||
return NewLoadV;
|
||||
|
||||
// TODO: Use vvp_select
|
||||
SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, LegalDataVT,
|
||||
{NewLoadV, PassThru, Mask, AVL});
|
||||
SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1);
|
||||
return CDAG.getMergeValues({DataV, NewLoadChainV});
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::legalizeInternalLoadStoreOp(SDValue Op,
|
||||
VECustomDAG &CDAG) const {
|
||||
LLVM_DEBUG(dbgs() << "::legalizeInternalLoadStoreOp\n";);
|
||||
|
@ -36,6 +36,23 @@ def SDTStoreVVP: SDTypeProfile<0, 5, [
|
||||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// vvp_scatter(chain, data, addr, mask, avl)
|
||||
def SDTScatterVVP: SDTypeProfile<0, 4, [
|
||||
SDTCisVec<0>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisVec<2>,
|
||||
SDTCisSameNumEltsAs<0, 2>,
|
||||
IsVLVT<3>
|
||||
]>;
|
||||
|
||||
// vvp_gather(chain, addr, mask, avl)
|
||||
def SDTGatherVVP: SDTypeProfile<1, 3, [
|
||||
SDTCisVec<0>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameNumEltsAs<0, 2>,
|
||||
IsVLVT<3>
|
||||
]>;
|
||||
|
||||
// Binary Operators {
|
||||
|
||||
// BinaryOp(x,y,mask,vl)
|
||||
@ -120,6 +137,11 @@ def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
|
||||
|
||||
// } Binary Operators
|
||||
|
||||
def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def vvp_gather : SDNode<"VEISD::VVP_GATHER", SDTGatherVVP,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
def vvp_load : SDNode<"VEISD::VVP_LOAD", SDTLoadVVP,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand ]>;
|
||||
def vvp_store : SDNode<"VEISD::VVP_STORE", SDTStoreVVP,
|
||||
|
@ -94,6 +94,41 @@ defm : VectorLoad<v256i64, i64, v256i1, "VGT", "VLD">;
|
||||
defm : VectorLoad<v256f32, i64, v256i1, "VGTU", "VLDU">;
|
||||
defm : VectorLoad<v256i32, i64, v256i1, "VGTLZX", "VLDLZX">;
|
||||
|
||||
// Vector Gather and scatter
|
||||
multiclass VectorGather<ValueType DataVT,
|
||||
ValueType PtrVT, ValueType MaskVT,
|
||||
string GTPrefix> {
|
||||
// Unmasked.
|
||||
def : Pat<(DataVT (vvp_gather
|
||||
PtrVT:$addr, (MaskVT true_mask), i32:$avl)),
|
||||
(!cast<Instruction>(GTPrefix#"vizl") $addr, 0, 0, $avl)>;
|
||||
// Masked.
|
||||
def : Pat<(DataVT (vvp_gather PtrVT:$addr, MaskVT:$mask, i32:$avl)),
|
||||
(!cast<Instruction>(GTPrefix#"vizml") $addr, 0, 0, $mask, $avl)>;
|
||||
}
|
||||
|
||||
defm : VectorGather<v256f64, v256i64, v256i1, "VGT">;
|
||||
defm : VectorGather<v256i64, v256i64, v256i1, "VGT">;
|
||||
defm : VectorGather<v256f32, v256i64, v256i1, "VGTU">;
|
||||
defm : VectorGather<v256i32, v256i64, v256i1, "VGTLZX">;
|
||||
|
||||
multiclass VectorScatter<ValueType DataVT,
|
||||
ValueType PtrVT, ValueType MaskVT,
|
||||
string SCPrefix> {
|
||||
// Unmasked.
|
||||
def : Pat<(vvp_scatter
|
||||
DataVT:$data, PtrVT:$addr, (MaskVT true_mask), i32:$avl),
|
||||
(!cast<Instruction>(SCPrefix#"vizvl") $addr, 0, 0, $data, $avl)>;
|
||||
// Masked.
|
||||
def : Pat<(vvp_scatter
|
||||
DataVT:$data, PtrVT:$addr, MaskVT:$mask, i32:$avl),
|
||||
(!cast<Instruction>(SCPrefix#"vizvml") $addr, 0, 0, $data, $mask, $avl)>;
|
||||
}
|
||||
|
||||
defm : VectorScatter<v256f64, v256i64, v256i1, "VSC">;
|
||||
defm : VectorScatter<v256i64, v256i64, v256i1, "VSC">;
|
||||
defm : VectorScatter<v256f32, v256i64, v256i1, "VSCU">;
|
||||
defm : VectorScatter<v256i32, v256i64, v256i1, "VSCL">;
|
||||
|
||||
|
||||
multiclass Binary_rv<SDPatternOperator OpNode,
|
||||
|
@ -44,6 +44,9 @@
|
||||
#define REGISTER_PACKED(OPC)
|
||||
#endif
|
||||
|
||||
ADD_VVP_OP(VVP_GATHER, MGATHER) HANDLE_VP_TO_VVP(VP_GATHER, VVP_GATHER)
|
||||
ADD_VVP_OP(VVP_SCATTER, MSCATTER) HANDLE_VP_TO_VVP(VP_SCATTER, VVP_SCATTER)
|
||||
|
||||
ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD)
|
||||
ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE)
|
||||
|
||||
|
99
llvm/test/CodeGen/VE/Vector/vec_gather.ll
Normal file
99
llvm/test/CodeGen/VE/Vector/vec_gather.ll
Normal file
@ -0,0 +1,99 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vec_mgather_v256f64(<256 x double*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %P, i32 4, <256 x i1> %M, <256 x double> undef)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vec_mgather_pt_v256f64(<256 x double*> %P, <256 x double> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_pt_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgt %v2, %v0, 0, 0
|
||||
; CHECK-NEXT: lea %s16, 256
|
||||
; CHECK-NEXT: lvl %s16
|
||||
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %P, i32 4, <256 x i1> %M, <256 x double> %PT)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
|
||||
declare <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x float> @vec_mgather_v256f32(<256 x float*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtu %v0, %v0, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %P, i32 4, <256 x i1> %M, <256 x float> undef)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x float> @vec_mgather_pt_v256f32(<256 x float*> %P, <256 x float> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_pt_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtu %v2, %v0, 0, 0
|
||||
; CHECK-NEXT: lea %s16, 256
|
||||
; CHECK-NEXT: lvl %s16
|
||||
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %P, i32 4, <256 x i1> %M, <256 x float> %PT)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
|
||||
declare <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @vec_mgather_v256i32(<256 x i32*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %P, i32 4, <256 x i1> %M, <256 x i32> undef)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @vec_mgather_pt_v256i32(<256 x i32*> %P, <256 x i32> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mgather_pt_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtl.zx %v2, %v0, 0, 0
|
||||
; CHECK-NEXT: lea %s16, 256
|
||||
; CHECK-NEXT: lvl %s16
|
||||
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %P, i32 4, <256 x i1> %M, <256 x i32> %PT)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
attributes #0 = { argmemonly nounwind readonly willreturn }
|
60
llvm/test/CodeGen/VE/Vector/vec_scatter.ll
Normal file
60
llvm/test/CodeGen/VE/Vector/vec_scatter.ll
Normal file
@ -0,0 +1,60 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare void @llvm.masked.scatter.v256i64.v256p0i64(<256 x i64>, <256 x i64*>, i32 immarg, <256 x i1>) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vec_mscatter_v256i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mscatter_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vsc %v0, %v1, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.scatter.v256i64.v256p0i64(<256 x i64> %V, <256 x i64*> %P, i32 4, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.v256f64.v256p0f64(<256 x double>, <256 x double*>, i32 immarg, <256 x i1>) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vec_mscatter_v256f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mscatter_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vsc %v0, %v1, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.scatter.v256f64.v256p0f64(<256 x double> %V, <256 x double*> %P, i32 4, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.v256f32.v256p0f32(<256 x float>, <256 x float*>, i32 immarg, <256 x i1>) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vec_mscatter_v256f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mscatter_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vscu %v0, %v1, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.scatter.v256f32.v256p0f32(<256 x float> %V, <256 x float*> %P, i32 4, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.v256i32.v256p0i32(<256 x i32>, <256 x i32*>, i32 immarg, <256 x i1>) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vec_mscatter_v256i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mscatter_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vscl %v0, %v1, 0, 0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.scatter.v256i32.v256p0i32(<256 x i32> %V, <256 x i32*> %P, i32 4, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { argmemonly nounwind readonly willreturn }
|
58
llvm/test/CodeGen/VE/Vector/vp_gather.ll
Normal file
58
llvm/test/CodeGen/VE/Vector/vp_gather.ll
Normal file
@ -0,0 +1,58 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x i64> @llvm.vp.gather.v256i64.v256p0i64(<256 x i64*>, <256 x i1>, i32)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i64> @vp_gather_v256i64(<256 x i64*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_gather_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i64> @llvm.vp.gather.v256i64.v256p0i64(<256 x i64*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret <256 x i64> %r
|
||||
}
|
||||
|
||||
declare <256 x double> @llvm.vp.gather.v256f64.v256p0f64(<256 x double*>, <256 x i1>, i32)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vp_gather_v256f64(<256 x double*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_gather_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.vp.gather.v256f64.v256p0f64(<256 x double*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
declare <256 x float> @llvm.vp.gather.v256f32.v256p0f32(<256 x float*>, <256 x i1>, i32)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x float> @vp_gather_v256f32(<256 x float*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_gather_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.vp.gather.v256f32.v256p0f32(<256 x float*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
declare <256 x i32> @llvm.vp.gather.v256i32.v256p0i32(<256 x i32*>, <256 x i1>, i32)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @vp_gather_v256i32(<256 x i32*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_gather_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.vp.gather.v256i32.v256p0i32(<256 x i32*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret <256 x i32> %r
|
||||
}
|
59
llvm/test/CodeGen/VE/Vector/vp_scatter.ll
Normal file
59
llvm/test/CodeGen/VE/Vector/vp_scatter.ll
Normal file
@ -0,0 +1,59 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare void @llvm.vp.scatter.v256i64.v256p0i64(<256 x i64>, <256 x i64*>, <256 x i1>, i32 %avl)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vp_mscatter_v256i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_mscatter_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vsc %v0, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.vp.scatter.v256i64.v256p0i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.vp.scatter.v256f64.v256p0f64(<256 x double>, <256 x double*>, <256 x i1>, i32 %avl)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vp_mscatter_v256f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_mscatter_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vsc %v0, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.vp.scatter.v256f64.v256p0f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.vp.scatter.v256f32.v256p0f32(<256 x float>, <256 x float*>, <256 x i1>, i32 %avl)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vp_mscatter_v256f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_mscatter_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vscu %v0, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.vp.scatter.v256f32.v256p0f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.vp.scatter.v256i32.v256p0i32(<256 x i32>, <256 x i32*>, <256 x i1>, i32 %avl)
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc void @vp_mscatter_v256i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M, i32 %avl) {
|
||||
; CHECK-LABEL: vp_mscatter_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vscl %v0, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.vp.scatter.v256i32.v256p0i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M, i32 %avl)
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user