From d2f85ae89566b3e1df2654bda1a13a2805aad20d Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 7 Mar 2013 18:48:40 +0000
Subject: [PATCH] X86: Fold EXTRACT_SUBVECTORs of a BUILD_VECTOR into a smaller
 BUILD_VECTOR.

That can usually be lowered efficiently and is common in sandybridge code.
It would be nice to do this in DAGCombiner but we can't insert arbitrary
BUILD_VECTORs this late.

Fixes PR15462.

llvm-svn: 176634
---
 lib/Target/X86/X86ISelLowering.cpp  |  5 +++++
 test/CodeGen/X86/avx-shift.ll       |  1 -
 test/CodeGen/X86/avx-vinsertf128.ll | 16 ++++++++++++++++
 test/CodeGen/X86/avx-vpermil.ll     |  4 ++--
 4 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9727de82036..b19f2f66be6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
                                * ElemsPerChunk);
 
+  // If the input is a buildvector just emit a smaller one.
+  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+                       Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+
   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
   SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
                                VecIdx);
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index b0bff454c18..01eb7361e29 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -105,7 +105,6 @@ define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
-; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
 define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index 9a954fe8047..ee37b27996a 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -129,3 +129,19 @@ entry:
   %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
   ret <8 x float> %2
 }
+
+define void @PR15462(i64* %p) {
+ store i64 0, i64* %p
+ %q = getelementptr i64* %p, i64 1
+ store i64 0, i64* %q
+ %r = getelementptr i64* %p, i64 2
+ store i64 0, i64* %r
+ %s = getelementptr i64* %p, i64 3
+ store i64 0, i64* %s
+ ret void
+
+; CHECK: PR15462:
+; CHECK: vxorps	%xmm
+; CHECK: vmovups
+; CHECK: vmovups
+}
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index cb904b93313..7f2f9d821dd 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,8 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: palignr
-; CHECK: palignr
+; CHECK: palignr $8
+; CHECK: psrldq $8
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>