Fix PR10688. Add support for spliting 256-bit vector shifts when the

shift amount is variable

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137885 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2011-08-17 22:12:20 +00:00
parent c40578250d
commit 0dd80b0d69
2 changed files with 30 additions and 10 deletions

View File

@ -9449,17 +9449,26 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
DAG, dl);
// Recreate the shift amount vectors
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
for (int i = 0; i < NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
for (int i = NumElems/2; i < NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
SDValue Amt1, Amt2;
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
// Constant shift amount
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
for (int i = 0; i < NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
for (int i = NumElems/2; i < NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
SDValue Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt1Csts[0], NumElems/2);
SDValue Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt2Csts[0], NumElems/2);
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt1Csts[0], NumElems/2);
Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
&Amt2Csts[0], NumElems/2);
} else {
// Variable shift amount
Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
DAG, dl);
}
// Issue new vector shifts for the smaller types
V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);

View File

@ -62,3 +62,14 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
ret <16 x i16> %s
}
;;; Support variable shifts
; CHECK: _vshift08
; CHECK: vextractf128 $1
; CHECK: vpslld $23
; CHECK: vextractf128 $1
; CHECK: vpslld $23
define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
%bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
ret <8 x i32> %bitop
}