[RISCV] Custom-legalise i32 SDIV/UDIV/UREM on RV64M

Follow the same custom legalisation strategy as used in D57085 for variable-length shifts (see that patch summary for more discussion). Although we may lose out on some late-stage DAG combines, I think this custom legalisation strategy is ultimately easier to reason about. There are some codegen changes in rv64m-exhaustive-w-insts.ll but they are all neutral in terms of the number of instructions. Differential Revision: https://reviews.llvm.org/D57096 llvm-svn: 352171
2025-04-01 07:11:45 +00:00 · 2019-01-25 05:11:34 +00:00 · 2019-01-25 05:11:34 +00:00 · 0fc69297a4
commit 0fc69297a4
parent fe1793aa58
4 changed files with 71 additions and 65 deletions
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@ -80,7 +80,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

  if (Subtarget.is64Bit()) {
-    setTargetDAGCombine(ISD::ANY_EXTEND);
    setOperationAction(ISD::SHL, MVT::i32, Custom);
    setOperationAction(ISD::SRA, MVT::i32, Custom);
    setOperationAction(ISD::SRL, MVT::i32, Custom);
@ -96,6 +95,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::UREM, XLenVT, Expand);
  }

+  if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
+    setOperationAction(ISD::SDIV, MVT::i32, Custom);
+    setOperationAction(ISD::UDIV, MVT::i32, Custom);
+    setOperationAction(ISD::UREM, MVT::i32, Custom);
+  }
+
  setOperationAction(ISD::SDIVREM, XLenVT, Expand);
  setOperationAction(ISD::UDIVREM, XLenVT, Expand);
  setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
@ -524,6 +529,12 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
    return RISCVISD::SRAW;
  case ISD::SRL:
    return RISCVISD::SRLW;
+  case ISD::SDIV:
+    return RISCVISD::DIVW;
+  case ISD::UDIV:
+    return RISCVISD::DIVUW;
+  case ISD::UREM:
+    return RISCVISD::REMUW;
  }
 }

@ -558,46 +569,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
      return;
    Results.push_back(customLegalizeToWOp(N, DAG));
    break;
-  }
-}
-
-// Returns true if the given node is an sdiv, udiv, or urem with non-constant
-// operands.
-static bool isVariableSDivUDivURem(SDValue Val) {
-  switch (Val.getOpcode()) {
-  default:
-    return false;
  case ISD::SDIV:
  case ISD::UDIV:
  case ISD::UREM:
-    return Val.getOperand(0).getOpcode() != ISD::Constant &&
-           Val.getOperand(1).getOpcode() != ISD::Constant;
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtM() && "Unexpected custom legalisation");
+    if (N->getOperand(0).getOpcode() == ISD::Constant ||
+        N->getOperand(1).getOpcode() == ISD::Constant)
+      return;
+    Results.push_back(customLegalizeToWOp(N, DAG));
+    break;
  }
 }

 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
-  SelectionDAG &DAG = DCI.DAG;
-
  switch (N->getOpcode()) {
  default:
    break;
-  case ISD::ANY_EXTEND: {
-    // If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
-    // in order to increase the chance of being able to select the
-    // divw/divuw/remuw instructions.
-    SDValue Src = N->getOperand(0);
-    if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
-      break;
-    if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
-      break;
-    SDLoc DL(N);
-    // Don't add the new node to the DAGCombiner worklist, in order to avoid
-    // an infinite cycle due to SimplifyDemandedBits converting the
-    // SIGN_EXTEND back to ANY_EXTEND.
-    return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src),
-                         false);
-  }
  case RISCVISD::SplitF64: {
    // If the input to SplitF64 is just BuildPairF64 then the operation is
    // redundant. Instead, use BuildPairF64's operands directly.
@ -633,6 +622,9 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
  case RISCVISD::SLLW:
  case RISCVISD::SRAW:
  case RISCVISD::SRLW:
+  case RISCVISD::DIVW:
+  case RISCVISD::DIVUW:
+  case RISCVISD::REMUW:
    // TODO: As the result is sign-extended, this is conservatively correct. A
    // more precise answer could be calculated for SRAW depending on known
    // bits in the shift amount.
@ -1736,6 +1728,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
    return "RISCVISD::SRAW";
  case RISCVISD::SRLW:
    return "RISCVISD::SRLW";
+  case RISCVISD::DIVW:
+    return "RISCVISD::DIVW";
+  case RISCVISD::DIVUW:
+    return "RISCVISD::DIVUW";
+  case RISCVISD::REMUW:
+    return "RISCVISD::REMUW";
  }
  return nullptr;
 }
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@ -36,7 +36,12 @@ enum NodeType : unsigned {
  // instructions.
  SLLW,
  SRAW,
-  SRLW
+  SRLW,
+  // 32-bit operations from RV64M that can't be simply matched with a pattern
+  // at instruction selection time.
+  DIVW,
+  DIVUW,
+  REMUW
 };
 }

--- a/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/lib/Target/RISCV/RISCVInstrInfoM.td
@ -11,6 +11,14 @@
 //
 //===----------------------------------------------------------------------===//

+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def riscv_divw  : SDNode<"RISCVISD::DIVW",  SDTIntBinOp>;
+def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
+def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@ -52,18 +60,19 @@ def : PatGprGpr<urem, REMU>;
 let Predicates = [HasStdExtM, IsRV64] in {
 def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
          (MULW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
-                            (sexti32 GPR:$rs2)), i32),
-          (DIVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
-                         (sexti32 GPR:$rs2))),
-          (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
-def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
-          (DIVUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a divuw and zero-extend the result than to
-// zero-extend both inputs to a udiv.
-def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
-          (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : PatGprGpr<riscv_divw, DIVW>;
+def : PatGprGpr<riscv_divuw, DIVUW>;
+def : PatGprGpr<riscv_remuw, REMUW>;
+
+// Handle the specific cases where using DIVU/REMU would be correct and result
+// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
+// result.
+def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+          (DIVU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+          (REMU GPR:$rs1, GPR:$rs2)>;
+
 // Although the sexti32 operands may not have originated from an i32 srem,
 // this pattern is safe as it is impossible for two sign extended inputs to
 // produce a result where res[63:32]=0 and res[31]=1.
@ -72,10 +81,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
 def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
                            (sexti32 GPR:$rs2)), i32),
          (REMW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
-          (REMUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a remuw and zero-extend the result than to
-// zero-extend both inputs to a urem.
-def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
-          (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
 } // Predicates = [HasStdExtM, IsRV64]
--- a/test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll
+++ b/test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll
@ -454,9 +454,9 @@ define zeroext i32 @zext_divuw_aext_sext(i32 %a, i32 signext %b) nounwind {
 define zeroext i32 @zext_divuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
 ; RV64IM-LABEL: zext_divuw_aext_zext:
 ; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divuw a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 32
 ; RV64IM-NEXT:    srli a0, a0, 32
-; RV64IM-NEXT:    divu a0, a0, a1
 ; RV64IM-NEXT:    ret
  %1 = udiv i32 %a, %b
  ret i32 %1
@ -487,9 +487,9 @@ define zeroext i32 @zext_divuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
 define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
 ; RV64IM-LABEL: zext_divuw_sext_zext:
 ; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    divuw a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 32
 ; RV64IM-NEXT:    srli a0, a0, 32
-; RV64IM-NEXT:    divu a0, a0, a1
 ; RV64IM-NEXT:    ret
  %1 = udiv i32 %a, %b
  ret i32 %1
@ -498,9 +498,9 @@ define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
 define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
 ; RV64IM-LABEL: zext_divuw_zext_aext:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    slli a1, a1, 32
-; RV64IM-NEXT:    srli a1, a1, 32
-; RV64IM-NEXT:    divu a0, a0, a1
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    srli a0, a0, 32
 ; RV64IM-NEXT:    ret
  %1 = udiv i32 %a, %b
  ret i32 %1
@ -509,9 +509,9 @@ define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
 define zeroext i32 @zext_divuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
 ; RV64IM-LABEL: zext_divuw_zext_sext:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    slli a1, a1, 32
-; RV64IM-NEXT:    srli a1, a1, 32
-; RV64IM-NEXT:    divu a0, a0, a1
+; RV64IM-NEXT:    divuw a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    srli a0, a0, 32
 ; RV64IM-NEXT:    ret
  %1 = udiv i32 %a, %b
  ret i32 %1
@ -1235,9 +1235,9 @@ define zeroext i32 @zext_remuw_aext_sext(i32 %a, i32 signext %b) nounwind {
 define zeroext i32 @zext_remuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
 ; RV64IM-LABEL: zext_remuw_aext_zext:
 ; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remuw a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 32
 ; RV64IM-NEXT:    srli a0, a0, 32
-; RV64IM-NEXT:    remu a0, a0, a1
 ; RV64IM-NEXT:    ret
  %1 = urem i32 %a, %b
  ret i32 %1
@ -1268,9 +1268,9 @@ define zeroext i32 @zext_remuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
 define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
 ; RV64IM-LABEL: zext_remuw_sext_zext:
 ; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    remuw a0, a0, a1
 ; RV64IM-NEXT:    slli a0, a0, 32
 ; RV64IM-NEXT:    srli a0, a0, 32
-; RV64IM-NEXT:    remu a0, a0, a1
 ; RV64IM-NEXT:    ret
  %1 = urem i32 %a, %b
  ret i32 %1
@ -1279,9 +1279,9 @@ define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
 define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
 ; RV64IM-LABEL: zext_remuw_zext_aext:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    slli a1, a1, 32
-; RV64IM-NEXT:    srli a1, a1, 32
-; RV64IM-NEXT:    remu a0, a0, a1
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    srli a0, a0, 32
 ; RV64IM-NEXT:    ret
  %1 = urem i32 %a, %b
  ret i32 %1
@ -1290,9 +1290,9 @@ define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
 define zeroext i32 @zext_remuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
 ; RV64IM-LABEL: zext_remuw_zext_sext:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    slli a1, a1, 32
-; RV64IM-NEXT:    srli a1, a1, 32
-; RV64IM-NEXT:    remu a0, a0, a1
+; RV64IM-NEXT:    remuw a0, a0, a1
+; RV64IM-NEXT:    slli a0, a0, 32
+; RV64IM-NEXT:    srli a0, a0, 32
 ; RV64IM-NEXT:    ret
  %1 = urem i32 %a, %b
  ret i32 %1