[PPC] Better codegen for AND, ANY_EXT, SRL sequence

https://reviews.llvm.org/D24924 This improves the code generated for a sequence of AND, ANY_EXT, SRL instructions. This is a targetted fix for this special pattern. The pattern is generated by target independet dag combiner and so a more general fix may not be necessary. If we come across other similar cases, some ideas for handling it are discussed on the code review. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284983 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-03 17:31:50 +00:00 · 2016-10-24 15:46:58 +00:00 · 2016-10-24 15:46:58 +00:00 · 5ba8f14a10
commit 5ba8f14a10
parent 83768272ee
3 changed files with 52 additions and 0 deletions
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -2657,6 +2657,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
      MB = 64 - countTrailingOnes(Imm64);
      SH = 0;

+      if (Val.getOpcode() == ISD::ANY_EXTEND) {
+        auto Op0 = Val.getOperand(0);
+        if ( Op0.getOpcode() == ISD::SRL &&
+           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+           auto ResultType = Val.getNode()->getValueType(0);
+           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+                                               ResultType);
+           SDValue IDVal (ImDef, 0);
+
+           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+                         ResultType, IDVal, Op0.getOperand(0),
+                         getI32Imm(1, dl)), 0);
+           SH = 64 - Imm;
+        }
+      }
+
      // If the operand is a logical right shift, we can fold it into this
      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
      // for n <= mb. The right shift is really a left rotate followed by a
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@ -658,3 +658,9 @@ Instruction fusion was introduced in ISA 2.06 and more opportunities added in
 ISA 2.07.  LLVM needs to add infrastructure to recognize fusion opportunities
 and force instruction pairs to be scheduled together.

+-----------------------------------------------------------------------------
+
+More general handling of any_extend and zero_extend:
+
+See https://reviews.llvm.org/D24924#555306
+
--- a/test/CodeGen/PowerPC/anyext_srl.ll
+++ b/test/CodeGen/PowerPC/anyext_srl.ll
@ -0,0 +1,29 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @foo(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %cmp.i = icmp ne i32 %and.i, 0
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ne i32 %and.i4, 0
+  %cmp = xor i1 %cmp.i, %cmp.i5
+  ret i1 %cmp
+; CHECK-LABEL: @foo
+; CHECK: rldicl  {{[0-9]+}}, {{[0-9]+}}, 61, 63
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}
+