[lanai] Custom lowering of SHL_PARTS

Summary: Implement custom lowering of SHL_PARTS to enable lowering of left shift with larger than 32-bit shifts. Reviewers: eliben, majnemer Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27232 llvm-svn: 288541
2024-11-28 16:11:29 +00:00 · 2016-12-02 22:01:28 +00:00 · 2016-12-02 22:01:28 +00:00 · 3bec3ef6cd
commit 3bec3ef6cd
parent e6c59c9663
3 changed files with 77 additions and 1 deletions
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@ -104,7 +104,7 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM,

  setOperationAction(ISD::ROTR, MVT::i32, Expand);
  setOperationAction(ISD::ROTL, MVT::i32, Expand);
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);

@ -179,6 +179,8 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
    return LowerSETCC(Op, DAG);
  case ISD::SETCCE:
    return LowerSETCCE(Op, DAG);
+  case ISD::SHL_PARTS:
+    return LowerSHL_PARTS(Op, DAG);
  case ISD::SRL_PARTS:
    return LowerSRL_PARTS(Op, DAG);
  case ISD::VASTART:
@ -1233,6 +1235,55 @@ SDValue LanaiTargetLowering::LowerJumpTable(SDValue Op,
  }
 }

+SDValue LanaiTargetLowering::LowerSHL_PARTS(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  SDLoc dl(Op);
+  assert(Op.getNumOperands() == 3 && "Unexpected SHL!");
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt = Op.getOperand(2);
+
+  // Performs the following for (ShOpLo + (ShOpHi << 32)) << ShAmt:
+  //   LoBitsForHi = (ShAmt == 0) ? 0 : (ShOpLo >> (32-ShAmt))
+  //   HiBitsForHi = ShOpHi << ShAmt
+  //   Hi = (ShAmt >= 32) ? (ShOpLo << (ShAmt-32)) : (LoBitsForHi | HiBitsForHi)
+  //   Lo = (ShAmt >= 32) ? 0 : (ShOpLo << ShAmt)
+  //   return (Hi << 32) | Lo;
+
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
+  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+
+  // If ShAmt == 0, we just calculated "(SRL ShOpLo, 32)" which is "undef". We
+  // wanted 0, so CSEL it directly.
+  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
+  SDValue SetCC = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
+  LoBitsForHi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, LoBitsForHi);
+
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, dl, MVT::i32));
+  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue HiForNormalShift =
+      DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
+
+  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+  SetCC = DAG.getSetCC(dl, MVT::i32, ExtraShAmt, Zero, ISD::SETGE);
+  SDValue Hi =
+      DAG.getSelect(dl, MVT::i32, SetCC, HiForBigShift, HiForNormalShift);
+
+  // Lanai shifts of larger than register sizes are wrapped rather than
+  // clamped, so we can't just emit "lo << b" if b is too big.
+  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Lo = DAG.getSelect(
+      dl, MVT::i32, SetCC, DAG.getConstant(0, dl, MVT::i32), LoForNormalShift);
+
+  SDValue Ops[2] = {Lo, Hi};
+  return DAG.getMergeValues(Ops, dl);
+}
+
 SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op,
                                            SelectionDAG &DAG) const {
  MVT VT = Op.getSimpleValueType();
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.h
@ -88,6 +88,7 @@ public:
  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;

--- a/llvm/test/CodeGen/Lanai/lshift64.ll
+++ b/llvm/test/CodeGen/Lanai/lshift64.ll
@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=lanai-unknown-unknown | FileCheck %s
+
+; Test left-shift i64 lowering does not result in call being inserted.
+
+; CHECK-LABEL: shift
+; CHECKT: bt __ashldi3
+; CHECK: or	%r0, 0x0, %r[[T0:[0-9]+]]
+; CHECK: mov	0x20, %r[[T1:[0-9]+]]
+; CHECK: sub	%r[[T1]], %r[[ShAmt:[0-9]+]], %r[[T1]]
+; CHECK: sub	%r0, %r[[T1]], %r[[T1]]
+; CHECK: sh	%r[[ShOpB:[0-9]+]], %r[[T1]], %r[[T1]]
+; CHECK: sub.f	%r[[ShAmt]], 0x0, %r0
+; CHECK: sel.eq %r0, %r[[T1]], %r[[T1]]
+; CHECK: sh	%r[[ShOpA:[0-9]+]], %r[[ShAmt]], %r[[T2:[0-9]+]]
+; CHECK: or	%r[[T1]], %r[[T2]], %rv
+; CHECK: sub.f	%r[[ShAmt]], 0x20, %r[[T1]]
+; CHECK: sh.pl	%r[[ShOpB]], %r[[T1]], %rv
+; CHECK: sh.mi	%r[[ShOpB]], %r[[ShAmt]], %r[[T0]]
+
+define i64 @shift(i64 inreg, i32 inreg) {
+  %3 = zext i32 %1 to i64
+  %4 = shl i64 %0, %3
+  ret i64 %4
+}