From d2f85ae89566b3e1df2654bda1a13a2805aad20d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 7 Mar 2013 18:48:40 +0000 Subject: [PATCH] X86: Fold EXTRACT_SUBVECTORs of a BUILD_VECTOR into a smaller BUILD_VECTOR. That can usually be lowered efficiently and is common in sandybridge code. It would be nice to do this in DAGCombiner but we can't insert arbitrary BUILD_VECTORs this late. Fixes PR15462. llvm-svn: 176634 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++++ test/CodeGen/X86/avx-shift.ll | 1 - test/CodeGen/X86/avx-vinsertf128.ll | 16 ++++++++++++++++ test/CodeGen/X86/avx-vpermil.ll | 4 ++-- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9727de82036..b19f2f66be6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); + // If the input is a buildvector just emit a smaller one. + if (Vec.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, + Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll index b0bff454c18..01eb7361e29 100644 --- a/test/CodeGen/X86/avx-shift.ll +++ b/test/CodeGen/X86/avx-shift.ll @@ -105,7 +105,6 @@ define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone { ; CHECK: _vshift08 ; CHECK: vextractf128 $1 ; CHECK: vpslld $23 -; CHECK: vextractf128 $1 ; CHECK: vpslld $23 define <8 x i32> @vshift08(<8 x i32> %a) nounwind { %bitop = shl <8 x i32> , %a diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll index 9a954fe8047..ee37b27996a 100644 --- a/test/CodeGen/X86/avx-vinsertf128.ll +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -129,3 +129,19 @@ entry: %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) ret <8 x float> %2 } + +define void @PR15462(i64* %p) { + store i64 0, i64* %p + %q = getelementptr i64* %p, i64 1 + store i64 0, i64* %q + %r = getelementptr i64* %p, i64 2 + store i64 0, i64* %r + %s = getelementptr i64* %p, i64 3 + store i64 0, i64* %s + ret void + +; CHECK: PR15462: +; CHECK: vxorps %xmm +; CHECK: vmovups +; CHECK: vmovups +} diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll index cb904b93313..7f2f9d821dd 100644 --- a/test/CodeGen/X86/avx-vpermil.ll +++ b/test/CodeGen/X86/avx-vpermil.ll @@ -45,8 +45,8 @@ entry: ret <8 x float> %shuffle } -; CHECK: palignr -; CHECK: palignr +; CHECK: palignr $8 +; CHECK: psrldq $8 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp { entry: %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32>