Fix PR14161

- Check index being extracted to be constant 0 before simplfiying.
  Otherwise, retain the original sequence.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166504 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2012-10-23 21:40:15 +00:00
parent e8e6b6b710
commit 0787274b70
2 changed files with 42 additions and 1 deletions

View File

@ -6630,9 +6630,12 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
.getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
}

View File

@ -0,0 +1,38 @@
; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)
define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
entry:
%2 = load <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1
%6 = extractelement <4 x i32> %3, i32 2
%7 = extractelement <4 x i32> %3, i32 3
%8 = bitcast i32 %4 to <2 x i16>
%9 = bitcast i32 %5 to <2 x i16>
ret <2 x i16> %8
; CHECK: good
; CHECK: pminud
; CHECK-NEXT: pmovzxwq
; CHECK: ret
}
define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
entry:
%2 = load <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1
%6 = extractelement <4 x i32> %3, i32 2
%7 = extractelement <4 x i32> %3, i32 3
%8 = bitcast i32 %4 to <2 x i16>
%9 = bitcast i32 %5 to <2 x i16>
ret <2 x i16> %9
; CHECK: bad
; CHECK: pminud
; CHECK: pextrd
; CHECK: pmovzxwq
; CHECK: ret
}