From 24c96fa80cda54ec63dfc739df4d92796942ac1d Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 15 Jan 2013 23:43:14 +0000 Subject: [PATCH] Teach InstCombine to optimize extract of a value from a vector add operation with a constant zero. llvm-svn: 172576 --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 9 +++++++++ test/Transforms/InstCombine/vec_extract_elt.ll | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dd7ea14e8a8..8bfcc805bc8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're @@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } + // Extract a value from a vector add operation with a constant zero. + Value *Val = 0; Constant *Con = 0; + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return FindScalarElement(Val, EltNo); + } + // Otherwise, we don't know. return 0; } diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll index 63e4ee2112d..166066a201b 100644 --- a/test/Transforms/InstCombine/vec_extract_elt.ll +++ b/test/Transforms/InstCombine/vec_extract_elt.ll @@ -7,3 +7,13 @@ define i32 @test(float %f) { ret i32 %tmp19 } +define i64 @test2(i64 %in) { + %vec = insertelement <8 x i64> undef, i64 %in, i32 0 + %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer + %add = add <8 x i64> %splat, + %scl1 = extractelement <8 x i64> %add, i32 0 + %scl2 = extractelement <8 x i64> %add, i32 0 + %r = add i64 %scl1, %scl2 + ret i64 %r +} +