mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-22 20:26:31 +00:00
[X86][SSE] Improve support for 128-bit vector sign extension
This patch improves support for sign extension of the lower lanes of vectors of integers by making use of the SSE41 pmovsx* sign extension instructions where possible, and optimizing the sign extension by shifts on pre-SSE41 targets (avoiding the use of i64 arithmetic shifts which require scalarization). It converts SIGN_EXTEND nodes to SIGN_EXTEND_VECTOR_INREG where necessary, that more closely matches the pmovsx* instruction than the default approach of using SIGN_EXTEND_INREG which splits the operation (into an ANY_EXTEND lowered to a shuffle followed by shifts) making instruction matching difficult during lowering. Necessary support for SIGN_EXTEND_VECTOR_INREG has been added to the DAGCombiner. Differential Revision: http://reviews.llvm.org/D9848 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237885 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cebe8f8b05
commit
87d1836793
@ -268,6 +268,7 @@ namespace {
|
||||
SDValue visitZERO_EXTEND(SDNode *N);
|
||||
SDValue visitANY_EXTEND(SDNode *N);
|
||||
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
|
||||
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
|
||||
SDValue visitTRUNCATE(SDNode *N);
|
||||
SDValue visitBITCAST(SDNode *N);
|
||||
SDValue visitBUILD_PAIR(SDNode *N);
|
||||
@ -1347,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
|
||||
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
|
||||
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
|
||||
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
|
||||
case ISD::TRUNCATE: return visitTRUNCATE(N);
|
||||
case ISD::BITCAST: return visitBITCAST(N);
|
||||
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
|
||||
@ -5541,7 +5543,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
|
||||
Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
|
||||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
|
||||
&& "Expected EXTEND dag node in input!");
|
||||
|
||||
// fold (sext c1) -> c1
|
||||
// fold (zext c1) -> c1
|
||||
@ -5563,7 +5566,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
|
||||
unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
|
||||
unsigned ShAmt = VTBits - EVTBits;
|
||||
SmallVector<SDValue, 8> Elts;
|
||||
unsigned NumElts = N0->getNumOperands();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDLoc DL(N);
|
||||
|
||||
for (unsigned i=0; i != NumElts; ++i) {
|
||||
@ -5576,7 +5579,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
|
||||
SDLoc DL(Op);
|
||||
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
|
||||
const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
|
||||
if (Opcode == ISD::SIGN_EXTEND)
|
||||
if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
|
||||
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
|
||||
DL, SVT));
|
||||
else
|
||||
@ -6805,6 +6808,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (N0.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
|
||||
LegalOperations))
|
||||
return SDValue(Res, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -3429,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
|
||||
assert(EVT.bitsLE(VT) && "Not extending!");
|
||||
if (EVT == VT) return N1; // Not actually extending
|
||||
|
||||
auto SignExtendInReg = [&](APInt Val) {
|
||||
unsigned FromBits = EVT.getScalarType().getSizeInBits();
|
||||
Val <<= Val.getBitWidth() - FromBits;
|
||||
Val = Val.ashr(Val.getBitWidth() - FromBits);
|
||||
return getConstant(Val, DL, VT.getScalarType());
|
||||
};
|
||||
|
||||
if (N1C) {
|
||||
APInt Val = N1C->getAPIntValue();
|
||||
unsigned FromBits = EVT.getScalarType().getSizeInBits();
|
||||
Val <<= Val.getBitWidth()-FromBits;
|
||||
Val = Val.ashr(Val.getBitWidth()-FromBits);
|
||||
return getConstant(Val, DL, VT);
|
||||
return SignExtendInReg(Val);
|
||||
}
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
|
||||
SDValue Op = N1.getOperand(i);
|
||||
if (Op.getValueType() != VT.getScalarType()) break;
|
||||
if (Op.getOpcode() == ISD::UNDEF) {
|
||||
Ops.push_back(Op);
|
||||
continue;
|
||||
}
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) {
|
||||
APInt Val = C->getAPIntValue();
|
||||
Ops.push_back(SignExtendInReg(Val));
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (Ops.size() == VT.getVectorNumElements())
|
||||
return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1004,6 +1004,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE2()) {
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
|
||||
|
||||
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
|
||||
|
||||
@ -13914,6 +13918,63 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
|
||||
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
|
||||
}
|
||||
|
||||
static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDValue In = Op->getOperand(0);
|
||||
MVT VT = Op->getSimpleValueType(0);
|
||||
MVT InVT = In.getSimpleValueType();
|
||||
assert(VT.getSizeInBits() == InVT.getSizeInBits());
|
||||
|
||||
MVT SVT = VT.getScalarType();
|
||||
MVT InSVT = InVT.getScalarType();
|
||||
assert(SVT.getScalarSizeInBits() > InSVT.getScalarSizeInBits());
|
||||
|
||||
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
|
||||
return SDValue();
|
||||
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
|
||||
return SDValue();
|
||||
|
||||
SDLoc dl(Op);
|
||||
|
||||
// SSE41 targets can use the pmovsx* instructions directly.
|
||||
if (Subtarget->hasSSE41())
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
|
||||
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
|
||||
SDValue Curr = In;
|
||||
MVT CurrVT = InVT;
|
||||
|
||||
// As SRAI is only available on i16/i32 types, we expand only up to i32
|
||||
// and handle i64 separately.
|
||||
while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
|
||||
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
|
||||
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
|
||||
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
|
||||
Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr);
|
||||
}
|
||||
|
||||
SDValue SignExt = Curr;
|
||||
if (CurrVT != InVT) {
|
||||
unsigned SignExtShift =
|
||||
CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
|
||||
SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
|
||||
DAG.getConstant(SignExtShift, dl, MVT::i8));
|
||||
}
|
||||
|
||||
if (CurrVT == VT)
|
||||
return SignExt;
|
||||
|
||||
if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
|
||||
SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
|
||||
DAG.getConstant(31, dl, MVT::i8));
|
||||
SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Ext);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = Op->getSimpleValueType(0);
|
||||
@ -17580,6 +17641,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
|
||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
|
||||
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
|
||||
case ISD::SIGN_EXTEND_VECTOR_INREG:
|
||||
return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG);
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
|
||||
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
|
||||
@ -23683,16 +23746,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc dl(N);
|
||||
EVT SVT = VT.getScalarType();
|
||||
EVT InVT = N0->getValueType(0);
|
||||
EVT InSVT = InVT.getScalarType();
|
||||
SDLoc DL(N);
|
||||
|
||||
// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
|
||||
// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
|
||||
// This exposes the sext to the sdivrem lowering, so that it directly extends
|
||||
// from AH (which we otherwise need to do contortions to access).
|
||||
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
|
||||
N0.getValueType() == MVT::i8 && VT == MVT::i32) {
|
||||
InVT == MVT::i8 && VT == MVT::i32) {
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
|
||||
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
|
||||
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
|
||||
N0.getOperand(0), N0.getOperand(1));
|
||||
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
|
||||
return R.getValue(1);
|
||||
@ -23700,14 +23766,57 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
if (!DCI.isBeforeLegalizeOps()) {
|
||||
if (N0.getValueType() == MVT::i1) {
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
SDValue Zero = DAG.getConstant(0, DL, VT);
|
||||
SDValue AllOnes =
|
||||
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
|
||||
return DAG.getNode(ISD::SELECT, dl, VT, N0, AllOnes, Zero);
|
||||
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (VT.isVector()) {
|
||||
auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
|
||||
EVT InVT = N->getValueType(0);
|
||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
|
||||
128 / InVT.getScalarSizeInBits());
|
||||
SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
|
||||
DAG.getUNDEF(InVT));
|
||||
Opnds[0] = N;
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
|
||||
};
|
||||
|
||||
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
|
||||
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
|
||||
if (VT.getSizeInBits() == 128 &&
|
||||
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
|
||||
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
|
||||
SDValue ExOp = ExtendToVec128(DL, N0);
|
||||
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
|
||||
}
|
||||
|
||||
// On pre-AVX2 targets, split into 128-bit nodes of
|
||||
// ISD::SIGN_EXTEND_VECTOR_INREG.
|
||||
if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) &&
|
||||
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
|
||||
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
|
||||
unsigned NumVecs = VT.getSizeInBits() / 128;
|
||||
unsigned NumSubElts = 128 / SVT.getSizeInBits();
|
||||
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
|
||||
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
|
||||
|
||||
SmallVector<SDValue, 8> Opnds;
|
||||
for (unsigned i = 0, Offset = 0; i != NumVecs;
|
||||
++i, Offset += NumSubElts) {
|
||||
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
||||
DAG.getIntPtrConstant(Offset, DL));
|
||||
SrcVec = ExtendToVec128(DL, SrcVec);
|
||||
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
|
||||
Opnds.push_back(SrcVec);
|
||||
}
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Subtarget->hasFp256())
|
||||
return SDValue();
|
||||
|
||||
|
@ -50,20 +50,22 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
|
||||
; CHECK: movq
|
||||
; CHECK: shlq
|
||||
; CHECK: sarq
|
||||
; CHECK: vmovq
|
||||
; CHECK: movq
|
||||
; CHECK: shlq
|
||||
; CHECK: sarq
|
||||
; CHECK: vmovq
|
||||
; CHECK: vpunpcklqdq
|
||||
; CHECK: vmovd
|
||||
; CHECK: vpinsrd
|
||||
; CHECK: movq
|
||||
; CHECK: shlq
|
||||
; CHECK: sarq
|
||||
; CHECK: vmovq
|
||||
; CHECK: vpinsrd
|
||||
; CHECK: shlq
|
||||
; CHECK: sarq
|
||||
; CHECK: vmovq
|
||||
; CHECK: vpunpcklqdq
|
||||
; CHECK: vpinsrd
|
||||
; CHECK: vpmovsxdq
|
||||
; CHECK: vmovd
|
||||
; CHECK: vpinsrd
|
||||
; CHECK: vpmovsxdq
|
||||
; CHECK: vinsertf128
|
||||
; CHECK: ret
|
||||
|
||||
|
@ -16,13 +16,9 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
|
||||
;
|
||||
; CHECK-WIDE-LABEL: foo1_8:
|
||||
; CHECK-WIDE: ## BB#0:
|
||||
; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; CHECK-WIDE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
@ -40,9 +36,7 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
|
||||
;
|
||||
; CHECK-WIDE-LABEL: foo1_4:
|
||||
; CHECK-WIDE: ## BB#0:
|
||||
; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: retl
|
||||
%res = sitofp <4 x i8> %src to <4 x float>
|
||||
|
@ -10,37 +10,30 @@
|
||||
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||
; SSE2-LABEL: sext_8i16_to_8i32:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: # kill: XMM0<def> XMM1<kill>
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pslld $16, %xmm1
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_8i16_to_8i32:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: # kill: XMM0<def> XMM1<kill>
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: pslld $16, %xmm0
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pslld $16, %xmm1
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_8i16_to_8i32:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovzxwd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pslld $16, %xmm0
|
||||
; SSE41-NEXT: psrad $16, %xmm0
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE41-NEXT: pslld $16, %xmm1
|
||||
; SSE41-NEXT: psrad $16, %xmm1
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: sext_8i16_to_8i32:
|
||||
@ -58,13 +51,10 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_8i16_to_8i32:
|
||||
; X32-SSE41: # BB#0: # %entry
|
||||
; X32-SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE41-NEXT: pmovzxwd %xmm1, %xmm0
|
||||
; X32-SSE41-NEXT: pslld $16, %xmm0
|
||||
; X32-SSE41-NEXT: psrad $16, %xmm0
|
||||
; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; X32-SSE41-NEXT: pslld $16, %xmm1
|
||||
; X32-SSE41-NEXT: psrad $16, %xmm1
|
||||
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1
|
||||
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
entry:
|
||||
%B = sext <8 x i16> %A to <8 x i32>
|
||||
@ -74,68 +64,31 @@ entry:
|
||||
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
|
||||
; SSE2-LABEL: sext_4i32_to_4i64:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_4i32_to_4i64:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_4i32_to_4i64:
|
||||
; SSE41: # BB#0: # %entry
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm2
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm1
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -154,20 +107,9 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_4i32_to_4i64:
|
||||
; X32-SSE41: # BB#0: # %entry
|
||||
; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: movd %xmm2, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
||||
; X32-SSE41-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
entry:
|
||||
@ -252,20 +194,26 @@ entry:
|
||||
define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
|
||||
; SSE2-LABEL: load_sext_test3:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movsbq 1(%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: movsbq (%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movzwl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $24, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_sext_test3:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movsbq 1(%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: movsbq (%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movzwl (%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $24, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_sext_test3:
|
||||
@ -292,20 +240,22 @@ entry:
|
||||
define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
|
||||
; SSE2-LABEL: load_sext_test4:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movswq 2(%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: movswq (%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movd (%rdi), %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_sext_test4:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movswq 2(%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: movswq (%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movd (%rdi), %xmm0
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_sext_test4:
|
||||
@ -332,20 +282,18 @@ entry:
|
||||
define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
|
||||
; SSE2-LABEL: load_sext_test5:
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movslq 4(%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: movslq (%rdi), %rax
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movq (%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_sext_test5:
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movslq 4(%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: movslq (%rdi), %rax
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movq (%rdi), %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_sext_test5:
|
||||
@ -410,72 +358,35 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: pslld $31, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_4i1_to_4i64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: pslld $31, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_4i1_to_4i64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pslld $31, %xmm0
|
||||
; SSE41-NEXT: psrad $31, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm2
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm1
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -500,20 +411,9 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
|
||||
; X32-SSE41: # BB#0:
|
||||
; X32-SSE41-NEXT: pslld $31, %xmm0
|
||||
; X32-SSE41-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: movd %xmm2, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
||||
; X32-SSE41-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
%extmask = sext <4 x i1> %mask to <4 x i64>
|
||||
@ -576,72 +476,35 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: pslld $24, %xmm0
|
||||
; SSE2-NEXT: psrad $24, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm1, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: movd %xmm0, %rax
|
||||
; SSE2-NEXT: cltq
|
||||
; SSE2-NEXT: movd %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_4i8_to_4i64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: pslld $24, %xmm0
|
||||
; SSSE3-NEXT: psrad $24, %xmm0
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm1, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm0, %rax
|
||||
; SSSE3-NEXT: cltq
|
||||
; SSSE3-NEXT: movd %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_4i8_to_4i64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pslld $24, %xmm0
|
||||
; SSE41-NEXT: psrad $24, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm1, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm2
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm3
|
||||
; SSE41-NEXT: movd %xmm0, %rax
|
||||
; SSE41-NEXT: cltq
|
||||
; SSE41-NEXT: movd %rax, %xmm1
|
||||
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -666,20 +529,9 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
||||
; X32-SSE41: # BB#0:
|
||||
; X32-SSE41-NEXT: pslld $24, %xmm0
|
||||
; X32-SSE41-NEXT: psrad $24, %xmm0
|
||||
; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: movd %xmm2, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
|
||||
; X32-SSE41-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
|
||||
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
%extmask = sext <4 x i8> %mask to <4 x i64>
|
||||
|
@ -14,8 +14,8 @@ target triple = "x86_64-apple-macosx"
|
||||
; <rdar://problem/18675020>
|
||||
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: vmovdqa {{.*#+}} xmm0 = [65535,0,0,65535]
|
||||
; CHECK: vmovdqa {{.*#+}} xmm2 = [65533,124,125,14807]
|
||||
; CHECK: vmovdqa {{.*#+}} xmm1 = [65533,124,125,14807]
|
||||
; CHECK: vmovdqa {{.*#+}} xmm1 = [65535,0,0,65535]
|
||||
; CHECK: ret
|
||||
define void @test(<4 x i16>* %a, <4 x i16>* %b) {
|
||||
body:
|
||||
@ -33,13 +33,14 @@ body:
|
||||
; of the condition.
|
||||
;
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: vpslld $31, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, [[MASK:%ymm[0-9]+]]
|
||||
; CHECK: vblendvpd [[MASK]]
|
||||
; CHECK: retq
|
||||
; CHECK: vpslld $31, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, [[MASK:%ymm[0-9]+]]
|
||||
; CHECK: vblendvpd [[MASK]]
|
||||
; CHECK: retq
|
||||
define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
|
||||
bb:
|
||||
%arrayidx1928 = getelementptr inbounds double*, double** %call1559, i64 %indvars.iv4198
|
||||
|
Loading…
x
Reference in New Issue
Block a user