[RISCV] Add support for selecting vrgather.vx/vi for fixed vector splat shuffles.

The test cases extract a fixed element from a vector and splat it
into a vector. This gets DAG combined into a splat shuffle.

I've used some very wide vectors in the test to make sure we have
at least a couple tests where the element doesn't fit into the
uimm5 immediate of vrgather.vi so we fall back to vrgather.vx.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D96186
This commit is contained in:
Craig Topper 2021-02-10 09:40:28 -08:00
parent 2193e8be3e
commit 0c254b4a69
5 changed files with 460 additions and 0 deletions

View File

@ -524,6 +524,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@ -554,6 +555,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@ -853,6 +855,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
if (Lane >= 0) {
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
SDValue Gather =
DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
return SDValue();
}
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@ -1102,6 +1134,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFPVECREDUCE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::LOAD:
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
case ISD::STORE:
@ -4638,6 +4672,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMA_VL)
NODE_NAME_CASE(VMCLR_VL)
NODE_NAME_CASE(VMSET_VL)
NODE_NAME_CASE(VRGATHER_VX_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
}

View File

@ -167,6 +167,9 @@ enum NodeType : unsigned {
VMCLR_VL,
VMSET_VL,
// Matches the semantics of vrgather.vx with an extra operand for VL.
VRGATHER_VX_VL,
// Memory opcodes start here.
VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
VSE_VL,

View File

@ -79,6 +79,14 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisVT<5, XLenVT>]>;
def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL>;
def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisVT<2, XLenVT>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>,
SDTCisVT<4, XLenVT>]>>;
def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>,
SDTCVecEltisVT<0, i1>,
SDTCisVT<1, XLenVT>]>;
@ -211,6 +219,41 @@ foreach vti = AllFloatVectors in {
} // Predicates = [HasStdExtV, HasStdExtF]
// 17.4. Vector Register GAther Instruction
let Predicates = [HasStdExtV] in {
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>;
}
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
foreach vti = AllFloatVectors in {
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>;
}
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//
// Miscellaneous RISCVISD SDNodes
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,177 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
define void @gather_const_v8f16(<8 x half>* %x) {
; CHECK-LABEL: gather_const_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 5
; CHECK-NEXT: vse16.v v26, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = extractelement <8 x half> %a, i32 5
%c = insertelement <8 x half> undef, half %b, i32 0
%d = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
store <8 x half> %d, <8 x half>* %x
ret void
}
define void @gather_const_v4f32(<4 x float>* %x) {
; CHECK-LABEL: gather_const_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 2
; CHECK-NEXT: vse32.v v26, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = extractelement <4 x float> %a, i32 2
%c = insertelement <4 x float> undef, float %b, i32 0
%d = shufflevector <4 x float> %c, <4 x float> undef, <4 x i32> zeroinitializer
store <4 x float> %d, <4 x float>* %x
ret void
}
define void @gather_const_v2f64(<2 x double>* %x) {
; CHECK-LABEL: gather_const_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 0
; CHECK-NEXT: vse64.v v26, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = extractelement <2 x double> %a, i32 0
%c = insertelement <2 x double> undef, double %b, i32 0
%d = shufflevector <2 x double> %c, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %d, <2 x double>* %x
ret void
}
define void @gather_const_v64f16(<64 x half>* %x) {
; LMULMAX8-LABEL: gather_const_v64f16:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 64
; LMULMAX8-NEXT: vsetvli a1, a1, e16,m8,ta,mu
; LMULMAX8-NEXT: vle16.v v8, (a0)
; LMULMAX8-NEXT: addi a1, zero, 47
; LMULMAX8-NEXT: vrgather.vx v16, v8, a1
; LMULMAX8-NEXT: vse16.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v64f16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 80
; LMULMAX1-NEXT: addi a2, zero, 8
; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 64
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 7
; LMULMAX1-NEXT: vse16.v v26, (a3)
; LMULMAX1-NEXT: vse16.v v26, (a2)
; LMULMAX1-NEXT: vse16.v v26, (a5)
; LMULMAX1-NEXT: vse16.v v26, (a1)
; LMULMAX1-NEXT: vse16.v v26, (a4)
; LMULMAX1-NEXT: vse16.v v26, (a7)
; LMULMAX1-NEXT: vse16.v v26, (a0)
; LMULMAX1-NEXT: vse16.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <64 x half>, <64 x half>* %x
%b = extractelement <64 x half> %a, i32 47
%c = insertelement <64 x half> undef, half %b, i32 0
%d = shufflevector <64 x half> %c, <64 x half> undef, <64 x i32> zeroinitializer
store <64 x half> %d, <64 x half>* %x
ret void
}
define void @gather_const_v32f32(<32 x float>* %x) {
; LMULMAX8-LABEL: gather_const_v32f32:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 32
; LMULMAX8-NEXT: vsetvli a1, a1, e32,m8,ta,mu
; LMULMAX8-NEXT: vle32.v v8, (a0)
; LMULMAX8-NEXT: vrgather.vi v16, v8, 17
; LMULMAX8-NEXT: vse32.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v32f32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 64
; LMULMAX1-NEXT: addi a2, zero, 4
; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 80
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 1
; LMULMAX1-NEXT: vse32.v v26, (a3)
; LMULMAX1-NEXT: vse32.v v26, (a2)
; LMULMAX1-NEXT: vse32.v v26, (a1)
; LMULMAX1-NEXT: vse32.v v26, (a5)
; LMULMAX1-NEXT: vse32.v v26, (a4)
; LMULMAX1-NEXT: vse32.v v26, (a7)
; LMULMAX1-NEXT: vse32.v v26, (a0)
; LMULMAX1-NEXT: vse32.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <32 x float>, <32 x float>* %x
%b = extractelement <32 x float> %a, i32 17
%c = insertelement <32 x float> undef, float %b, i32 0
%d = shufflevector <32 x float> %c, <32 x float> undef, <32 x i32> zeroinitializer
store <32 x float> %d, <32 x float>* %x
ret void
}
define void @gather_const_v16f64(<16 x double>* %x) {
; LMULMAX8-LABEL: gather_const_v16f64:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 16
; LMULMAX8-NEXT: vsetvli a1, a1, e64,m8,ta,mu
; LMULMAX8-NEXT: vle64.v v8, (a0)
; LMULMAX8-NEXT: vrgather.vi v16, v8, 10
; LMULMAX8-NEXT: vse64.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v16f64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 80
; LMULMAX1-NEXT: addi a2, zero, 2
; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; LMULMAX1-NEXT: vle64.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 64
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 0
; LMULMAX1-NEXT: vse64.v v26, (a3)
; LMULMAX1-NEXT: vse64.v v26, (a2)
; LMULMAX1-NEXT: vse64.v v26, (a5)
; LMULMAX1-NEXT: vse64.v v26, (a1)
; LMULMAX1-NEXT: vse64.v v26, (a4)
; LMULMAX1-NEXT: vse64.v v26, (a7)
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: vse64.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <16 x double>, <16 x double>* %x
%b = extractelement <16 x double> %a, i32 10
%c = insertelement <16 x double> undef, double %b, i32 0
%d = shufflevector <16 x double> %c, <16 x double> undef, <16 x i32> zeroinitializer
store <16 x double> %d, <16 x double>* %x
ret void
}

View File

@ -0,0 +1,202 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
define void @gather_const_v16i8(<16 x i8>* %x) {
; CHECK-LABEL: gather_const_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 16
; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
; CHECK-NEXT: vle8.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 12
; CHECK-NEXT: vse8.v v26, (a0)
; CHECK-NEXT: ret
%a = load <16 x i8>, <16 x i8>* %x
%b = extractelement <16 x i8> %a, i32 12
%c = insertelement <16 x i8> undef, i8 %b, i32 0
%d = shufflevector <16 x i8> %c, <16 x i8> undef, <16 x i32> zeroinitializer
store <16 x i8> %d, <16 x i8>* %x
ret void
}
define void @gather_const_v8i16(<8 x i16>* %x) {
; CHECK-LABEL: gather_const_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 5
; CHECK-NEXT: vse16.v v26, (a0)
; CHECK-NEXT: ret
%a = load <8 x i16>, <8 x i16>* %x
%b = extractelement <8 x i16> %a, i32 5
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = shufflevector <8 x i16> %c, <8 x i16> undef, <8 x i32> zeroinitializer
store <8 x i16> %d, <8 x i16>* %x
ret void
}
define void @gather_const_v4i32(<4 x i32>* %x) {
; CHECK-LABEL: gather_const_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 3
; CHECK-NEXT: vse32.v v26, (a0)
; CHECK-NEXT: ret
%a = load <4 x i32>, <4 x i32>* %x
%b = extractelement <4 x i32> %a, i32 3
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = shufflevector <4 x i32> %c, <4 x i32> undef, <4 x i32> zeroinitializer
store <4 x i32> %d, <4 x i32>* %x
ret void
}
define void @gather_const_v2i64(<2 x i64>* %x) {
; CHECK-LABEL: gather_const_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 1
; CHECK-NEXT: vse64.v v26, (a0)
; CHECK-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = extractelement <2 x i64> %a, i32 1
%c = insertelement <2 x i64> undef, i64 %b, i32 0
%d = shufflevector <2 x i64> %c, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %d, <2 x i64>* %x
ret void
}
define void @gather_const_v64i8(<64 x i8>* %x) {
; LMULMAX4-LABEL: gather_const_v64i8:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: addi a1, zero, 64
; LMULMAX4-NEXT: vsetvli a1, a1, e8,m4,ta,mu
; LMULMAX4-NEXT: vle8.v v28, (a0)
; LMULMAX4-NEXT: addi a1, zero, 32
; LMULMAX4-NEXT: vrgather.vx v8, v28, a1
; LMULMAX4-NEXT: vse8.v v8, (a0)
; LMULMAX4-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v64i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 32
; LMULMAX1-NEXT: addi a2, zero, 16
; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a1)
; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: addi a3, a0, 48
; LMULMAX1-NEXT: vrgather.vi v26, v25, 0
; LMULMAX1-NEXT: vse8.v v26, (a1)
; LMULMAX1-NEXT: vse8.v v26, (a3)
; LMULMAX1-NEXT: vse8.v v26, (a0)
; LMULMAX1-NEXT: vse8.v v26, (a2)
; LMULMAX1-NEXT: ret
%a = load <64 x i8>, <64 x i8>* %x
%b = extractelement <64 x i8> %a, i32 32
%c = insertelement <64 x i8> undef, i8 %b, i32 0
%d = shufflevector <64 x i8> %c, <64 x i8> undef, <64 x i32> zeroinitializer
store <64 x i8> %d, <64 x i8>* %x
ret void
}
define void @gather_const_v16i16(<32 x i16>* %x) {
; LMULMAX4-LABEL: gather_const_v16i16:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: addi a1, zero, 32
; LMULMAX4-NEXT: vsetvli a1, a1, e16,m4,ta,mu
; LMULMAX4-NEXT: vle16.v v28, (a0)
; LMULMAX4-NEXT: vrgather.vi v8, v28, 25
; LMULMAX4-NEXT: vse16.v v8, (a0)
; LMULMAX4-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v16i16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 48
; LMULMAX1-NEXT: addi a2, zero, 8
; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a1)
; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: addi a3, a0, 32
; LMULMAX1-NEXT: vrgather.vi v26, v25, 1
; LMULMAX1-NEXT: vse16.v v26, (a3)
; LMULMAX1-NEXT: vse16.v v26, (a1)
; LMULMAX1-NEXT: vse16.v v26, (a0)
; LMULMAX1-NEXT: vse16.v v26, (a2)
; LMULMAX1-NEXT: ret
%a = load <32 x i16>, <32 x i16>* %x
%b = extractelement <32 x i16> %a, i32 25
%c = insertelement <32 x i16> undef, i16 %b, i32 0
%d = shufflevector <32 x i16> %c, <32 x i16> undef, <32 x i32> zeroinitializer
store <32 x i16> %d, <32 x i16>* %x
ret void
}
define void @gather_const_v16i32(<16 x i32>* %x) {
; LMULMAX4-LABEL: gather_const_v16i32:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: addi a1, zero, 16
; LMULMAX4-NEXT: vsetvli a1, a1, e32,m4,ta,mu
; LMULMAX4-NEXT: vle32.v v28, (a0)
; LMULMAX4-NEXT: vrgather.vi v8, v28, 9
; LMULMAX4-NEXT: vse32.v v8, (a0)
; LMULMAX4-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v16i32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 32
; LMULMAX1-NEXT: addi a2, zero, 4
; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v25, (a1)
; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: addi a3, a0, 48
; LMULMAX1-NEXT: vrgather.vi v26, v25, 1
; LMULMAX1-NEXT: vse32.v v26, (a1)
; LMULMAX1-NEXT: vse32.v v26, (a3)
; LMULMAX1-NEXT: vse32.v v26, (a0)
; LMULMAX1-NEXT: vse32.v v26, (a2)
; LMULMAX1-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %x
%b = extractelement <16 x i32> %a, i32 9
%c = insertelement <16 x i32> undef, i32 %b, i32 0
%d = shufflevector <16 x i32> %c, <16 x i32> undef, <16 x i32> zeroinitializer
store <16 x i32> %d, <16 x i32>* %x
ret void
}
define void @gather_const_v8i64(<8 x i64>* %x) {
; LMULMAX4-LABEL: gather_const_v8i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: addi a1, zero, 8
; LMULMAX4-NEXT: vsetvli a1, a1, e64,m4,ta,mu
; LMULMAX4-NEXT: vle64.v v28, (a0)
; LMULMAX4-NEXT: vrgather.vi v8, v28, 3
; LMULMAX4-NEXT: vse64.v v8, (a0)
; LMULMAX4-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v8i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: addi a2, zero, 2
; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; LMULMAX1-NEXT: vle64.v v25, (a1)
; LMULMAX1-NEXT: addi a2, a0, 48
; LMULMAX1-NEXT: addi a3, a0, 32
; LMULMAX1-NEXT: vrgather.vi v26, v25, 1
; LMULMAX1-NEXT: vse64.v v26, (a3)
; LMULMAX1-NEXT: vse64.v v26, (a2)
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: vse64.v v26, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %x
%b = extractelement <8 x i64> %a, i32 3
%c = insertelement <8 x i64> undef, i64 %b, i32 0
%d = shufflevector <8 x i64> %c, <8 x i64> undef, <8 x i32> zeroinitializer
store <8 x i64> %d, <8 x i64>* %x
ret void
}