Fix an optimization involving EXTRACT_SUBVECTOR in DAGCombine so it behaves correctly. PR11494.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145996 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Friedman 2011-12-07 00:11:56 +00:00
parent c9040b3b13
commit 26323442d5
2 changed files with 34 additions and 12 deletions

View File

@ -7181,19 +7181,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue(); return SDValue();
// Combine: // Only handle cases where both indexes are constants with the same type.
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
// Into: ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
// indicies are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
//
SDValue InsIdx = N->getOperand(1);
SDValue ExtIdx = V->getOperand(2);
if (InsIdx == ExtIdx) if (InsIdx && ExtIdx &&
return V->getOperand(1); InsIdx->getValueType(0).getSizeInBits() <= 64 &&
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, ExtIdx->getValueType(0).getSizeInBits() <= 64) {
V->getOperand(0), N->getOperand(1)); // Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
return V->getOperand(1);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
V->getOperand(0), N->getOperand(1));
}
} }
return SDValue(); return SDValue();

View File

@ -0,0 +1,18 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; PR11494
define void @test(<4 x i32>* nocapture %p) nounwind {
; CHECK: test:
; CHECK: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
; CHECK-NEXT: ret
%a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
%c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x i32> %c, <4 x i32>* %p, align 1
ret void
}
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone