mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 13:37:07 +00:00
[SDAG] Move TRUNCATE splitting logic into a helper, and use
it more liberally. SplitVecOp_TRUNCATE has logic for recursively splitting oversize vectors that need more than one round of splitting to become legal. There are many other ISD nodes that could benefit from this logic, so factor it out and use it for FP_TO_UINT,FP_TO_SINT,SINT_TO_FP,UINT_TO_FP and FTRUNC. llvm-svn: 233681
This commit is contained in:
parent
f737774cb9
commit
c4bf9e7282
@ -593,6 +593,7 @@ private:
|
||||
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_UnaryOp(SDNode *N);
|
||||
SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp);
|
||||
|
||||
SDValue SplitVecOp_BITCAST(SDNode *N);
|
||||
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
@ -600,7 +601,6 @@ private:
|
||||
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
|
||||
SDValue SplitVecOp_TRUNCATE(SDNode *N);
|
||||
SDValue SplitVecOp_VSETCC(SDNode *N);
|
||||
SDValue SplitVecOp_FP_ROUND(SDNode *N);
|
||||
|
||||
|
@ -1293,7 +1293,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
|
||||
case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
|
||||
case ISD::TRUNCATE:
|
||||
Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
|
||||
break;
|
||||
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
|
||||
case ISD::STORE:
|
||||
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
|
||||
@ -1304,20 +1306,32 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::VSELECT:
|
||||
Res = SplitVecOp_VSELECT(N, OpNo);
|
||||
break;
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
|
||||
Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
|
||||
else
|
||||
Res = SplitVecOp_UnaryOp(N);
|
||||
break;
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
|
||||
Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
|
||||
else
|
||||
Res = SplitVecOp_UnaryOp(N);
|
||||
break;
|
||||
case ISD::CTTZ:
|
||||
case ISD::CTLZ:
|
||||
case ISD::CTPOP:
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::FTRUNC:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND:
|
||||
Res = SplitVecOp_UnaryOp(N);
|
||||
break;
|
||||
case ISD::FTRUNC:
|
||||
Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1581,7 +1595,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N,
|
||||
unsigned TruncateOp) {
|
||||
// The result type is legal, but the input type is illegal. If splitting
|
||||
// ends up with the result type of each half still being legal, just
|
||||
// do that. If, however, that would result in an illegal result type,
|
||||
@ -1624,8 +1639,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
|
||||
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
|
||||
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
|
||||
NumElements/2);
|
||||
SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
|
||||
SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
|
||||
SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
|
||||
SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
|
||||
// Concatenate them to get the full intermediate truncation result.
|
||||
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
|
||||
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
|
||||
@ -1634,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
|
||||
// type. This should normally be something that ends up being legal directly,
|
||||
// but in theory if a target has very wide vectors and an annoyingly
|
||||
// restricted set of legal types, this split can chain to build things up.
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
|
||||
return DAG.getNode(TruncateOp, DL, OutVT, InterVec);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
|
||||
|
@ -3,11 +3,11 @@
|
||||
|
||||
define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
|
||||
; CHECK: fptosi_v4f64_to_v4i16
|
||||
; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
|
||||
; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
|
||||
; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
|
||||
; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
|
||||
; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
|
||||
; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
|
||||
; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
|
||||
; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
|
||||
; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
|
||||
; CHECK: xtn v0.4h, v[[MID]].4s
|
||||
%tmp1 = load <4 x double>, <4 x double>* %ptr
|
||||
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
|
||||
ret <4 x i16> %tmp2
|
||||
@ -19,13 +19,13 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
|
||||
; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
|
||||
; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
|
||||
; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
|
||||
; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
|
||||
; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
|
||||
; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
|
||||
; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
|
||||
; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV1]].4h, v[[CONV0]].4h
|
||||
; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV3]].4h, v[[CONV2]].4h
|
||||
; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
|
||||
; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
|
||||
; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
|
||||
; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
|
||||
; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
|
||||
; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s
|
||||
; CHECK: xtn v0.8b, v[[TMP1]].8h
|
||||
%tmp1 = load <8 x double>, <8 x double>* %ptr
|
||||
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
|
||||
ret <8 x i8> %tmp2
|
||||
|
16
test/CodeGen/AArch64/vcvt-oversize.ll
Normal file
16
test/CodeGen/AArch64/vcvt-oversize.ll
Normal file
@ -0,0 +1,16 @@
|
||||
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
|
||||
|
||||
define <8 x i8> @float_to_i8(<8 x float>* %in) {
|
||||
; CHECK-LABEL: float_to_i8:
|
||||
; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
|
||||
; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
|
||||
; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
|
||||
; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
|
||||
; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
|
||||
; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
|
||||
; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
|
||||
%l = load <8 x float>, <8 x float>* %in
|
||||
%scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
|
||||
%conv = fptoui <8 x float> %scale to <8 x i8>
|
||||
ret <8 x i8> %conv
|
||||
}
|
@ -180,8 +180,8 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
|
||||
|
||||
define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
|
||||
; CHECK-LABEL: fix_double_to_i16:
|
||||
; CHECK: vcvt.s32.f64
|
||||
; CHECK: vcvt.s32.f64
|
||||
; CHECK: vcvt.u32.f64
|
||||
; CHECK: vcvt.u32.f64
|
||||
|
||||
%scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
|
||||
%conv = fptoui <4 x double> %scale to <4 x i16>
|
||||
|
Loading…
Reference in New Issue
Block a user