[RISCV] Custom-legalise i32 SDIV/UDIV/UREM on RV64M

Follow the same custom legalisation strategy as used in D57085 for
variable-length shifts (see that patch summary for more discussion). Although
we may lose out on some late-stage DAG combines, I think this custom
legalisation strategy is ultimately easier to reason about.

There are some codegen changes in rv64m-exhaustive-w-insts.ll but they are all
neutral in terms of the number of instructions.

Differential Revision: https://reviews.llvm.org/D57096

llvm-svn: 352171
This commit is contained in:
Alex Bradbury 2019-01-25 05:11:34 +00:00
parent fe1793aa58
commit 0fc69297a4
4 changed files with 71 additions and 65 deletions

View File

@ -80,7 +80,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (Subtarget.is64Bit()) {
setTargetDAGCombine(ISD::ANY_EXTEND);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
@ -96,6 +95,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, XLenVT, Expand);
}
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
}
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
@ -524,6 +529,12 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
return RISCVISD::SRAW;
case ISD::SRL:
return RISCVISD::SRLW;
case ISD::SDIV:
return RISCVISD::DIVW;
case ISD::UDIV:
return RISCVISD::DIVUW;
case ISD::UREM:
return RISCVISD::REMUW;
}
}
@ -558,46 +569,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
}
// Returns true if the given node is an sdiv, udiv, or urem with non-constant
// operands.
static bool isVariableSDivUDivURem(SDValue Val) {
switch (Val.getOpcode()) {
default:
return false;
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM:
return Val.getOperand(0).getOpcode() != ISD::Constant &&
Val.getOperand(1).getOpcode() != ISD::Constant;
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtM() && "Unexpected custom legalisation");
if (N->getOperand(0).getOpcode() == ISD::Constant ||
N->getOperand(1).getOpcode() == ISD::Constant)
return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
break;
case ISD::ANY_EXTEND: {
// If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
// in order to increase the chance of being able to select the
// divw/divuw/remuw instructions.
SDValue Src = N->getOperand(0);
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
break;
if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
break;
SDLoc DL(N);
// Don't add the new node to the DAGCombiner worklist, in order to avoid
// an infinite cycle due to SimplifyDemandedBits converting the
// SIGN_EXTEND back to ANY_EXTEND.
return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src),
false);
}
case RISCVISD::SplitF64: {
// If the input to SplitF64 is just BuildPairF64 then the operation is
// redundant. Instead, use BuildPairF64's operands directly.
@ -633,6 +622,9 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
case RISCVISD::REMUW:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@ -1736,6 +1728,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "RISCVISD::SRAW";
case RISCVISD::SRLW:
return "RISCVISD::SRLW";
case RISCVISD::DIVW:
return "RISCVISD::DIVW";
case RISCVISD::DIVUW:
return "RISCVISD::DIVUW";
case RISCVISD::REMUW:
return "RISCVISD::REMUW";
}
return nullptr;
}

View File

@ -36,7 +36,12 @@ enum NodeType : unsigned {
// instructions.
SLLW,
SRAW,
SRLW
SRLW,
// 32-bit operations from RV64M that can't be simply matched with a pattern
// at instruction selection time.
DIVW,
DIVUW,
REMUW
};
}

View File

@ -11,6 +11,14 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
def riscv_divw : SDNode<"RISCVISD::DIVW", SDTIntBinOp>;
def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@ -52,18 +60,19 @@ def : PatGprGpr<urem, REMU>;
let Predicates = [HasStdExtM, IsRV64] in {
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
(MULW GPR:$rs1, GPR:$rs2)>;
def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
(sexti32 GPR:$rs2)), i32),
(DIVW GPR:$rs1, GPR:$rs2)>;
def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
(sexti32 GPR:$rs2))),
(SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
(DIVUW GPR:$rs1, GPR:$rs2)>;
// It's cheaper to perform a divuw and zero-extend the result than to
// zero-extend both inputs to a udiv.
def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
(SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
def : PatGprGpr<riscv_divw, DIVW>;
def : PatGprGpr<riscv_divuw, DIVUW>;
def : PatGprGpr<riscv_remuw, REMUW>;
// Handle the specific cases where using DIVU/REMU would be correct and result
// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
// result.
def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
(DIVU GPR:$rs1, GPR:$rs2)>;
def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
(REMU GPR:$rs1, GPR:$rs2)>;
// Although the sexti32 operands may not have originated from an i32 srem,
// this pattern is safe as it is impossible for two sign extended inputs to
// produce a result where res[63:32]=0 and res[31]=1.
@ -72,10 +81,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
(sexti32 GPR:$rs2)), i32),
(REMW GPR:$rs1, GPR:$rs2)>;
def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
(REMUW GPR:$rs1, GPR:$rs2)>;
// It's cheaper to perform a remuw and zero-extend the result than to
// zero-extend both inputs to a urem.
def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
(SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
} // Predicates = [HasStdExtM, IsRV64]

View File

@ -454,9 +454,9 @@ define zeroext i32 @zext_divuw_aext_sext(i32 %a, i32 signext %b) nounwind {
define zeroext i32 @zext_divuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64IM-LABEL: zext_divuw_aext_zext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: divuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: divu a0, a0, a1
; RV64IM-NEXT: ret
%1 = udiv i32 %a, %b
ret i32 %1
@ -487,9 +487,9 @@ define zeroext i32 @zext_divuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
; RV64IM-LABEL: zext_divuw_sext_zext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: divuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: divu a0, a0, a1
; RV64IM-NEXT: ret
%1 = udiv i32 %a, %b
ret i32 %1
@ -498,9 +498,9 @@ define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64IM-LABEL: zext_divuw_zext_aext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a1, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: divu a0, a0, a1
; RV64IM-NEXT: divuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: ret
%1 = udiv i32 %a, %b
ret i32 %1
@ -509,9 +509,9 @@ define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
define zeroext i32 @zext_divuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
; RV64IM-LABEL: zext_divuw_zext_sext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a1, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: divu a0, a0, a1
; RV64IM-NEXT: divuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: ret
%1 = udiv i32 %a, %b
ret i32 %1
@ -1235,9 +1235,9 @@ define zeroext i32 @zext_remuw_aext_sext(i32 %a, i32 signext %b) nounwind {
define zeroext i32 @zext_remuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64IM-LABEL: zext_remuw_aext_zext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: remuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: remu a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i32 %a, %b
ret i32 %1
@ -1268,9 +1268,9 @@ define zeroext i32 @zext_remuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
; RV64IM-LABEL: zext_remuw_sext_zext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: remuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: remu a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i32 %a, %b
ret i32 %1
@ -1279,9 +1279,9 @@ define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64IM-LABEL: zext_remuw_zext_aext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a1, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: remu a0, a0, a1
; RV64IM-NEXT: remuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: ret
%1 = urem i32 %a, %b
ret i32 %1
@ -1290,9 +1290,9 @@ define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
define zeroext i32 @zext_remuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
; RV64IM-LABEL: zext_remuw_zext_sext:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a1, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: remu a0, a0, a1
; RV64IM-NEXT: remuw a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 32
; RV64IM-NEXT: srli a0, a0, 32
; RV64IM-NEXT: ret
%1 = urem i32 %a, %b
ret i32 %1