mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 20:30:06 +00:00
Optimization for SIGN_EXTEND operation on AVX.
Special handling was added for v4i32 -> v4i64 and v8i16 -> v8i32 extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149600 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1ae52f686c
commit
dcabc7bca9
@ -1221,6 +1221,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::TRUNCATE);
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
if (Subtarget->is64Bit())
|
||||
@ -14641,6 +14642,55 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
if (!Subtarget->hasAVX()) return SDValue();
|
||||
|
||||
// Optimize vectors in AVX mode
|
||||
// Sign extend v8i16 to v8i32 and
|
||||
// v4i32 to v4i64
|
||||
//
|
||||
// Divide input vector into two parts
|
||||
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
|
||||
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
|
||||
// concat the vectors to original VT
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Op = N->getOperand(0);
|
||||
EVT OpVT = Op.getValueType();
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
if (((VT == MVT::v4i64) && (OpVT == MVT::v4i32)) ||
|
||||
((VT == MVT::v8i32) && (OpVT == MVT::v8i16))) {
|
||||
|
||||
unsigned NumElems = OpVT.getVectorNumElements();
|
||||
SmallVector<int,8> ShufMask1(NumElems, -1);
|
||||
for (unsigned i=0; i< NumElems/2; i++) ShufMask1[i] = i;
|
||||
|
||||
SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
|
||||
ShufMask1.data());
|
||||
|
||||
SmallVector<int,8> ShufMask2(NumElems, -1);
|
||||
for (unsigned i=0; i< NumElems/2; i++) ShufMask2[i] = i+NumElems/2;
|
||||
|
||||
SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
|
||||
ShufMask2.data());
|
||||
|
||||
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
|
||||
VT.getVectorNumElements()/2);
|
||||
|
||||
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
|
||||
OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
|
||||
@ -14886,6 +14936,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
|
||||
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
|
||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
|
||||
case X86ISD::SHUFP: // Handle all target specific shuffles
|
||||
|
@ -219,6 +219,9 @@ namespace llvm {
|
||||
// VZEXT_MOVL - Vector move low and zero extend.
|
||||
VZEXT_MOVL,
|
||||
|
||||
// VZEXT_MOVL - Vector move low and sign extend.
|
||||
VSEXT_MOVL,
|
||||
|
||||
// VSHL, VSRL - 128-bit vector logical left / right shift
|
||||
VSHLDQ, VSRLDQ,
|
||||
|
||||
|
@ -71,6 +71,9 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
||||
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
|
||||
|
||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
|
||||
|
@ -5478,6 +5478,16 @@ let Predicates = [HasSSE41] in {
|
||||
(PMOVZXDQrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE41] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
||||
}
|
||||
|
||||
|
||||
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
|
17
test/CodeGen/X86/avx-sext.ll
Executable file
17
test/CodeGen/X86/avx-sext.ll
Executable file
@ -0,0 +1,17 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||
;CHECK: sext_8i16_to_8i32
|
||||
;CHECK: vpmovsxwd
|
||||
|
||||
%B = sext <8 x i16> %A to <8 x i32>
|
||||
ret <8 x i32>%B
|
||||
}
|
||||
|
||||
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
|
||||
;CHECK: sext_4i32_to_4i64
|
||||
;CHECK: vpmovsxdq
|
||||
|
||||
%B = sext <4 x i32> %A to <4 x i64>
|
||||
ret <4 x i64>%B
|
||||
}
|
Loading…
Reference in New Issue
Block a user