[LegalizeTypes][VP] Add integer promotions support for VP_TRUNCATE

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D125739
This commit is contained in:
Lian Wang 2022-05-17 01:47:32 +00:00
parent 993070d11f
commit bbc6834e26
3 changed files with 153 additions and 3 deletions

View File

@ -98,6 +98,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL:
case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@ -1349,11 +1350,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts.divideCoefficientBy(2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
if (N->getOpcode() == ISD::TRUNCATE) {
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
} else {
assert(N->getOpcode() == ISD::VP_TRUNCATE &&
"Expected VP_TRUNCATE opcode");
SDValue MaskLo, MaskHi, EVLLo, EVLHi;
std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
std::tie(EVLLo, EVLHi) =
DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo);
EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi);
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
// TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some
// targets.
case TargetLowering::TypeWidenVector: {
SDValue WideInOp = GetWidenedVector(InOp);
@ -1375,6 +1388,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
}
// Truncate to NVT instead of VT
if (N->getOpcode() == ISD::VP_TRUNCATE)
return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1),
N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
}
@ -1627,6 +1643,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
OpNo); break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@ -2077,6 +2094,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
if (N->getOpcode() == ISD::VP_TRUNCATE)
return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op,
N->getOperand(1), N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
}

View File

@ -2,6 +2,30 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
declare <2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<2 x i16>, <2 x i1>, i32)
define <2 x i7> @vtrunc_nxv2i7_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv2i7_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 %vl)
ret <2 x i7> %v
}
declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<2 x i15>, <2 x i1>, i32)
define <2 x i8> @vtrunc_nxv2i8_nxv2i15(<2 x i15> %a, <2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv2i8_nxv2i15:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<2 x i15> %a, <2 x i1> %m, i32 %vl)
ret <2 x i8> %v
}
declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16>, <2 x i1>, i32)
define <2 x i8> @vtrunc_nxv2i8_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) {
@ -24,6 +48,52 @@ define <2 x i8> @vtrunc_nxv2i8_nxv2i16_unmasked(<2 x i16> %a, i32 zeroext %vl) {
ret <2 x i8> %v
}
declare <128 x i7> @llvm.vp.trunc.nxv128i7.nxv128i16(<128 x i16>, <128 x i1>, i32)
define <128 x i7> @vtrunc_nxv128i7_nxv128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv128i7_nxv128i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu
; CHECK-NEXT: addi a2, a0, -64
; CHECK-NEXT: vslidedown.vi v0, v0, 8
; CHECK-NEXT: bltu a0, a2, .LBB4_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB4_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
; CHECK-NEXT: vslideup.vx v16, v8, a1
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <128 x i7> @llvm.vp.trunc.nxv128i7.nxv128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl)
ret <128 x i7> %v
}
declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32>, <2 x i1>, i32)
define <2 x i8> @vtrunc_nxv2i8_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) {

View File

@ -2,6 +2,30 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i7> @vtrunc_nxv2i7_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv2i7_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i7> %v
}
declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<vscale x 2 x i15>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i15(<vscale x 2 x i15> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv2i8_nxv2i15:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<vscale x 2 x i15> %a, <vscale x 2 x i1> %m, i32 %vl)
ret <vscale x 2 x i8> %v
}
declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
@ -151,3 +175,39 @@ define <vscale x 2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i64> %a
%v = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<vscale x 2 x i64> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
ret <vscale x 2 x i32> %v
}
declare <vscale x 32 x i7> @llvm.vp.trunc.nxv32i7.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i1>, i32)
define <vscale x 32 x i7> @vtrunc_nxv32i7_nxv32i32(<vscale x 32 x i32> %a, <vscale x 32 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: bltu a0, a3, .LBB14_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB14_2:
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
; CHECK-NEXT: vncvt.x.x.w v28, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu
; CHECK-NEXT: vncvt.x.x.w v18, v28, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB14_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB14_4:
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu
; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t
; CHECK-NEXT: vmv4r.v v8, v16
; CHECK-NEXT: ret
%v = call <vscale x 32 x i7> @llvm.vp.trunc.nxv32i7.nxv32i32(<vscale x 32 x i32> %a, <vscale x 32 x i1> %m, i32 %vl)
ret <vscale x 32 x i7> %v
}