From 13774eea42c993ca1ab8a6d19373d3967002c2ad Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 12 Oct 2016 00:48:25 +0000 Subject: [PATCH] [PPCMIPeephole] Fix splat elimination Summary: In PPCMIPeephole, when we see two splat instructions, we can't simply do the following transformation: B = Splat A C = Splat B => C = Splat A because B may still be used between these two instructions. Instead, we should make the second Splat a PPC::COPY and let later passes decide whether to remove it or not: B = Splat A C = Splat B => B = Splat A C = COPY B Fixes PR30663. Reviewers: echristo, iteratee, kbarton, nemanjai Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D25493 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283961 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCMIPeephole.cpp | 8 +++++--- test/CodeGen/PowerPC/pr30663.ll | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/PowerPC/pr30663.ll diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 3360e74db99..ee62bb3b500 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -201,11 +201,13 @@ bool PPCMIPeephole::simplifyCode(void) { // Splat fed by another splat - switch the output of the first // and remove the second. if (SameOpcode) { - DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + DEBUG(dbgs() << "Changing redundant splat to a copy: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(OpNo)); ToErase = &MI; Simplified = true; - DEBUG(dbgs() << "Removing redundant splat: "); - DEBUG(MI.dump()); } // Splat fed by a shift. Usually when we align value to splat into // vector element zero. diff --git a/test/CodeGen/PowerPC/pr30663.ll b/test/CodeGen/PowerPC/pr30663.ll new file mode 100644 index 00000000000..0772fcaadfe --- /dev/null +++ b/test/CodeGen/PowerPC/pr30663.ll @@ -0,0 +1,24 @@ +; RUN: llc -O1 < %s | FileCheck %s +target triple = "powerpc64le-linux-gnu" + +; The second xxspltw should be eliminated. +; CHECK: xxspltw +; CHECK-NOT: xxspltw +define void @Test() { +bb4: + %tmp = load <4 x i8>, <4 x i8>* undef + %tmp8 = bitcast <4 x i8> %tmp to float + %tmp18 = fmul float %tmp8, undef + %tmp19 = fsub float 0.000000e+00, %tmp18 + store float %tmp19, float* undef + %tmp22 = shufflevector <4 x i8> %tmp, <4 x i8> undef, <16 x i32> + %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float> + %tmp25 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> %tmp23, <4 x float> undef) + %tmp26 = fsub <4 x float> zeroinitializer, %tmp25 + %tmp27 = bitcast <4 x float> %tmp26 to <4 x i32> + tail call void @llvm.ppc.altivec.stvx(<4 x i32> %tmp27, i8* undef) + ret void +} + +declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)