mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-02 09:49:09 +00:00
AVX-512: insert element to mask vector; store i1 data
Implemented INSERT_VECTOR_ELT operation for v16i1 and v8i1 vectors; Implemented "store" for i1 type git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205850 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d97cbff528
commit
0d5d656524
@ -1394,6 +1394,8 @@ void X86TargetLowering::resetOperationActions() {
|
||||
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
|
||||
@ -5811,6 +5813,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
uint64_t Immediate = 0;
|
||||
int NonConstIdx = -1;
|
||||
bool IsSplat = true;
|
||||
unsigned NumNonConsts = 0;
|
||||
unsigned NumConsts = 0;
|
||||
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
|
||||
SDValue In = Op.getOperand(idx);
|
||||
if (In.getOpcode() == ISD::UNDEF)
|
||||
@ -5818,9 +5822,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (!isa<ConstantSDNode>(In)) {
|
||||
AllContants = false;
|
||||
NonConstIdx = idx;
|
||||
NumNonConsts++;
|
||||
}
|
||||
else if (cast<ConstantSDNode>(In)->getZExtValue())
|
||||
else {
|
||||
NumConsts++;
|
||||
if (cast<ConstantSDNode>(In)->getZExtValue())
|
||||
Immediate |= (1ULL << idx);
|
||||
}
|
||||
if (In != Op.getOperand(0))
|
||||
IsSplat = false;
|
||||
}
|
||||
@ -5832,6 +5840,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
if (NumNonConsts == 1 && NonConstIdx != 0) {
|
||||
SDValue DstVec;
|
||||
if (NumConsts) {
|
||||
SDValue VecAsImm = DAG.getConstant(Immediate,
|
||||
MVT::getIntegerVT(VT.getSizeInBits()));
|
||||
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
|
||||
}
|
||||
else
|
||||
DstVec = DAG.getUNDEF(VT);
|
||||
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
|
||||
Op.getOperand(NonConstIdx),
|
||||
DAG.getIntPtrConstant(NonConstIdx));
|
||||
}
|
||||
if (!IsSplat && (NonConstIdx != 0))
|
||||
llvm_unreachable("Unsupported BUILD_VECTOR operation");
|
||||
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
|
||||
@ -7948,10 +7969,47 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Insert one bit to mask vector, like v16i1 or v8i1.
|
||||
/// AVX-512 feature.
|
||||
SDValue
|
||||
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDValue Elt = Op.getOperand(1);
|
||||
SDValue Idx = Op.getOperand(2);
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
|
||||
if (!isa<ConstantSDNode>(Idx)) {
|
||||
// Non constant index. Extend source and destination,
|
||||
// insert element and then truncate the result.
|
||||
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
|
||||
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
|
||||
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
|
||||
}
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(IdxVal, MVT::i8));
|
||||
const TargetRegisterClass* rc = getRegClassFor(VecVT);
|
||||
unsigned MaxSift = rc->getSize()*8 - 1;
|
||||
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(MaxSift, MVT::i8));
|
||||
EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
|
||||
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
|
||||
}
|
||||
SDValue
|
||||
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
|
||||
if (EltVT == MVT::i1)
|
||||
return InsertBitToMaskVector(Op, DAG);
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
|
@ -874,6 +874,8 @@ namespace llvm {
|
||||
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -984,6 +984,10 @@ let Predicates = [HasAVX512] in {
|
||||
(EXTRACT_SUBREG
|
||||
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
|
||||
sub_16bit)>;
|
||||
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)>;
|
||||
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK8)>;
|
||||
}
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -1356,6 +1360,14 @@ defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
|
||||
"vmovdqu64", SSEPackedInt, v8i64>,
|
||||
XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
|
||||
(v16i32 immAllZerosV), GR16:$mask)),
|
||||
(VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
|
||||
|
||||
def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
|
||||
(bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
|
||||
(VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
|
||||
(bc_v8i64 (v16i32 immAllZerosV)))),
|
||||
@ -4211,3 +4223,7 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
|
||||
GR8:$mask),
|
||||
(VPCONFLICTQrrk VR512:$src1,
|
||||
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
|
||||
|
||||
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||
|
@ -158,3 +158,41 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
||||
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test15
|
||||
;CHECK: kshiftlw
|
||||
;CHECK: kmovw
|
||||
;CHECK: ret
|
||||
define i16 @test15(i1 *%addr) {
|
||||
%x = load i1 * %addr, align 128
|
||||
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
|
||||
%x2 = bitcast <16 x i1>%x1 to i16
|
||||
ret i16 %x2
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test16
|
||||
;CHECK: kshiftlw
|
||||
;CHECK: kshiftrw
|
||||
;CHECK: korw
|
||||
;CHECK: ret
|
||||
define i16 @test16(i1 *%addr, i16 %a) {
|
||||
%x = load i1 * %addr, align 128
|
||||
%a1 = bitcast i16 %a to <16 x i1>
|
||||
%x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
|
||||
%x2 = bitcast <16 x i1>%x1 to i16
|
||||
ret i16 %x2
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test17
|
||||
;CHECK: kshiftlw
|
||||
;CHECK: kshiftrw
|
||||
;CHECK: korw
|
||||
;CHECK: ret
|
||||
define i8 @test17(i1 *%addr, i8 %a) {
|
||||
%x = load i1 * %addr, align 128
|
||||
%a1 = bitcast i8 %a to <8 x i1>
|
||||
%x1 = insertelement <8 x i1> %a1, i1 %x, i32 10
|
||||
%x2 = bitcast <8 x i1>%x1 to i8
|
||||
ret i8 %x2
|
||||
}
|
||||
|
||||
|
@ -153,3 +153,12 @@ define void @test18(i8 * %addr, <8 x i64> %data) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: store_i1
|
||||
; CHECK: movb
|
||||
; CHECK: movb
|
||||
; CHECK: ret
|
||||
define void @store_i1() {
|
||||
store i1 true, i1 addrspace(3)* undef, align 128
|
||||
store i1 false, i1 addrspace(2)* undef, align 128
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user