mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 18:27:59 +00:00
Make LowerSIGN_EXTEND_INREG split 256-bit vectors when AVX1 is enabled and use AVX2 shifts when AVX2 is enabled.
llvm-svn: 145022
This commit is contained in:
parent
5cadaed864
commit
62ae335144
@ -10571,9 +10571,9 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
SDNode* Node = Op.getNode();
|
||||
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
|
||||
EVT VT = Node->getValueType(0);
|
||||
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (Subtarget->hasXMMInt() && VT.isVector()) {
|
||||
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
|
||||
ExtraVT.getScalarType().getSizeInBits();
|
||||
@ -10584,21 +10584,55 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return SDValue();
|
||||
case MVT::v4i32: {
|
||||
case MVT::v4i32:
|
||||
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
|
||||
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
|
||||
break;
|
||||
}
|
||||
case MVT::v8i16: {
|
||||
case MVT::v8i16:
|
||||
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
|
||||
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
|
||||
break;
|
||||
}
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
if (!Subtarget->hasAVX())
|
||||
return SDValue();
|
||||
if (!Subtarget->hasAVX2()) {
|
||||
// needs to be split
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
SDValue Idx0 = DAG.getConstant(0, MVT::i32);
|
||||
SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
|
||||
|
||||
// Extract the LHS vectors
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
|
||||
SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
|
||||
|
||||
MVT EltVT = VT.getVectorElementType().getSimpleVT();
|
||||
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
|
||||
|
||||
EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
int ExtraNumElems = ExtraVT.getVectorNumElements();
|
||||
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
|
||||
ExtraNumElems/2);
|
||||
SDValue Extra = DAG.getValueType(ExtraVT);
|
||||
|
||||
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
|
||||
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
|
||||
}
|
||||
if (VT == MVT::v8i32) {
|
||||
SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
|
||||
SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
|
||||
} else {
|
||||
SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
|
||||
SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(SHLIntrinsicsID, MVT::i32),
|
||||
Node->getOperand(0), ShAmt);
|
||||
Op.getOperand(0), ShAmt);
|
||||
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(SRAIntrinsicsID, MVT::i32),
|
||||
|
@ -112,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
|
||||
ret <8 x i32> %bitop
|
||||
}
|
||||
|
||||
;;; Uses shifts for sign extension
|
||||
; CHECK: _sext_v16i16
|
||||
; CHECK: vpsllw
|
||||
; CHECK: vpsraw
|
||||
; CHECK: vpsllw
|
||||
; CHECK: vpsraw
|
||||
; CHECK: vinsertf128
|
||||
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
|
||||
%b = trunc <16 x i16> %a to <16 x i8>
|
||||
%c = sext <16 x i8> %b to <16 x i16>
|
||||
ret <16 x i16> %c
|
||||
}
|
||||
|
||||
; CHECK: _sext_v8i32
|
||||
; CHECK: vpslld
|
||||
; CHECK: vpsrad
|
||||
; CHECK: vpslld
|
||||
; CHECK: vpsrad
|
||||
; CHECK: vinsertf128
|
||||
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
||||
%b = trunc <8 x i32> %a to <8 x i16>
|
||||
%c = sext <8 x i16> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
@ -246,3 +246,23 @@ define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
|
||||
; CHECK: vpsubb
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
; CHECK: _sext_v16i16
|
||||
; CHECK: vpsllw
|
||||
; CHECK: vpsraw
|
||||
; CHECK-NOT: vinsertf128
|
||||
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
|
||||
%b = trunc <16 x i16> %a to <16 x i8>
|
||||
%c = sext <16 x i8> %b to <16 x i16>
|
||||
ret <16 x i16> %c
|
||||
}
|
||||
|
||||
; CHECK: _sext_v8i32
|
||||
; CHECK: vpslld
|
||||
; CHECK: vpsrad
|
||||
; CHECK-NOT: vinsertf128
|
||||
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
||||
%b = trunc <8 x i32> %a to <8 x i16>
|
||||
%c = sext <8 x i16> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user