mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-01 07:11:45 +00:00
[RISCV] Custom-legalise i32 SDIV/UDIV/UREM on RV64M
Follow the same custom legalisation strategy as used in D57085 for variable-length shifts (see that patch summary for more discussion). Although we may lose out on some late-stage DAG combines, I think this custom legalisation strategy is ultimately easier to reason about. There are some codegen changes in rv64m-exhaustive-w-insts.ll but they are all neutral in terms of the number of instructions. Differential Revision: https://reviews.llvm.org/D57096 llvm-svn: 352171
This commit is contained in:
parent
fe1793aa58
commit
0fc69297a4
@ -80,7 +80,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
|
||||
|
||||
if (Subtarget.is64Bit()) {
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setOperationAction(ISD::SHL, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SRA, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SRL, MVT::i32, Custom);
|
||||
@ -96,6 +95,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::UREM, XLenVT, Expand);
|
||||
}
|
||||
|
||||
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
|
||||
setOperationAction(ISD::SDIV, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UDIV, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UREM, MVT::i32, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
|
||||
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
|
||||
@ -524,6 +529,12 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
|
||||
return RISCVISD::SRAW;
|
||||
case ISD::SRL:
|
||||
return RISCVISD::SRLW;
|
||||
case ISD::SDIV:
|
||||
return RISCVISD::DIVW;
|
||||
case ISD::UDIV:
|
||||
return RISCVISD::DIVUW;
|
||||
case ISD::UREM:
|
||||
return RISCVISD::REMUW;
|
||||
}
|
||||
}
|
||||
|
||||
@ -558,46 +569,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
return;
|
||||
Results.push_back(customLegalizeToWOp(N, DAG));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the given node is an sdiv, udiv, or urem with non-constant
|
||||
// operands.
|
||||
static bool isVariableSDivUDivURem(SDValue Val) {
|
||||
switch (Val.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case ISD::SDIV:
|
||||
case ISD::UDIV:
|
||||
case ISD::UREM:
|
||||
return Val.getOperand(0).getOpcode() != ISD::Constant &&
|
||||
Val.getOperand(1).getOpcode() != ISD::Constant;
|
||||
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
|
||||
Subtarget.hasStdExtM() && "Unexpected custom legalisation");
|
||||
if (N->getOperand(0).getOpcode() == ISD::Constant ||
|
||||
N->getOperand(1).getOpcode() == ISD::Constant)
|
||||
return;
|
||||
Results.push_back(customLegalizeToWOp(N, DAG));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ISD::ANY_EXTEND: {
|
||||
// If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
|
||||
// in order to increase the chance of being able to select the
|
||||
// divw/divuw/remuw instructions.
|
||||
SDValue Src = N->getOperand(0);
|
||||
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
|
||||
break;
|
||||
if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
|
||||
break;
|
||||
SDLoc DL(N);
|
||||
// Don't add the new node to the DAGCombiner worklist, in order to avoid
|
||||
// an infinite cycle due to SimplifyDemandedBits converting the
|
||||
// SIGN_EXTEND back to ANY_EXTEND.
|
||||
return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src),
|
||||
false);
|
||||
}
|
||||
case RISCVISD::SplitF64: {
|
||||
// If the input to SplitF64 is just BuildPairF64 then the operation is
|
||||
// redundant. Instead, use BuildPairF64's operands directly.
|
||||
@ -633,6 +622,9 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
|
||||
case RISCVISD::SLLW:
|
||||
case RISCVISD::SRAW:
|
||||
case RISCVISD::SRLW:
|
||||
case RISCVISD::DIVW:
|
||||
case RISCVISD::DIVUW:
|
||||
case RISCVISD::REMUW:
|
||||
// TODO: As the result is sign-extended, this is conservatively correct. A
|
||||
// more precise answer could be calculated for SRAW depending on known
|
||||
// bits in the shift amount.
|
||||
@ -1736,6 +1728,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return "RISCVISD::SRAW";
|
||||
case RISCVISD::SRLW:
|
||||
return "RISCVISD::SRLW";
|
||||
case RISCVISD::DIVW:
|
||||
return "RISCVISD::DIVW";
|
||||
case RISCVISD::DIVUW:
|
||||
return "RISCVISD::DIVUW";
|
||||
case RISCVISD::REMUW:
|
||||
return "RISCVISD::REMUW";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -36,7 +36,12 @@ enum NodeType : unsigned {
|
||||
// instructions.
|
||||
SLLW,
|
||||
SRAW,
|
||||
SRLW
|
||||
SRLW,
|
||||
// 32-bit operations from RV64M that can't be simply matched with a pattern
|
||||
// at instruction selection time.
|
||||
DIVW,
|
||||
DIVUW,
|
||||
REMUW
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,14 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// RISC-V specific DAG Nodes.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def riscv_divw : SDNode<"RISCVISD::DIVW", SDTIntBinOp>;
|
||||
def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
|
||||
def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -52,18 +60,19 @@ def : PatGprGpr<urem, REMU>;
|
||||
let Predicates = [HasStdExtM, IsRV64] in {
|
||||
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
|
||||
(MULW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
|
||||
(sexti32 GPR:$rs2)), i32),
|
||||
(DIVW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
|
||||
(sexti32 GPR:$rs2))),
|
||||
(SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
|
||||
def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
|
||||
(DIVUW GPR:$rs1, GPR:$rs2)>;
|
||||
// It's cheaper to perform a divuw and zero-extend the result than to
|
||||
// zero-extend both inputs to a udiv.
|
||||
def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
|
||||
(SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
|
||||
|
||||
def : PatGprGpr<riscv_divw, DIVW>;
|
||||
def : PatGprGpr<riscv_divuw, DIVUW>;
|
||||
def : PatGprGpr<riscv_remuw, REMUW>;
|
||||
|
||||
// Handle the specific cases where using DIVU/REMU would be correct and result
|
||||
// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
|
||||
// result.
|
||||
def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
|
||||
(DIVU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
|
||||
(REMU GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
// Although the sexti32 operands may not have originated from an i32 srem,
|
||||
// this pattern is safe as it is impossible for two sign extended inputs to
|
||||
// produce a result where res[63:32]=0 and res[31]=1.
|
||||
@ -72,10 +81,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
|
||||
def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
|
||||
(sexti32 GPR:$rs2)), i32),
|
||||
(REMW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
|
||||
(REMUW GPR:$rs1, GPR:$rs2)>;
|
||||
// It's cheaper to perform a remuw and zero-extend the result than to
|
||||
// zero-extend both inputs to a urem.
|
||||
def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
|
||||
(SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
|
||||
} // Predicates = [HasStdExtM, IsRV64]
|
||||
|
@ -454,9 +454,9 @@ define zeroext i32 @zext_divuw_aext_sext(i32 %a, i32 signext %b) nounwind {
|
||||
define zeroext i32 @zext_divuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_divuw_aext_zext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: divuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: divu a0, a0, a1
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = udiv i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -487,9 +487,9 @@ define zeroext i32 @zext_divuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
|
||||
define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_divuw_sext_zext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: divuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: divu a0, a0, a1
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = udiv i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -498,9 +498,9 @@ define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
|
||||
define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
|
||||
; RV64IM-LABEL: zext_divuw_zext_aext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a1, a1, 32
|
||||
; RV64IM-NEXT: srli a1, a1, 32
|
||||
; RV64IM-NEXT: divu a0, a0, a1
|
||||
; RV64IM-NEXT: divuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = udiv i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -509,9 +509,9 @@ define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
|
||||
define zeroext i32 @zext_divuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_divuw_zext_sext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a1, a1, 32
|
||||
; RV64IM-NEXT: srli a1, a1, 32
|
||||
; RV64IM-NEXT: divu a0, a0, a1
|
||||
; RV64IM-NEXT: divuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = udiv i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -1235,9 +1235,9 @@ define zeroext i32 @zext_remuw_aext_sext(i32 %a, i32 signext %b) nounwind {
|
||||
define zeroext i32 @zext_remuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_remuw_aext_zext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: remuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: remu a0, a0, a1
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = urem i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -1268,9 +1268,9 @@ define zeroext i32 @zext_remuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
|
||||
define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_remuw_sext_zext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: remuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: remu a0, a0, a1
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = urem i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -1279,9 +1279,9 @@ define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
|
||||
define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
|
||||
; RV64IM-LABEL: zext_remuw_zext_aext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a1, a1, 32
|
||||
; RV64IM-NEXT: srli a1, a1, 32
|
||||
; RV64IM-NEXT: remu a0, a0, a1
|
||||
; RV64IM-NEXT: remuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = urem i32 %a, %b
|
||||
ret i32 %1
|
||||
@ -1290,9 +1290,9 @@ define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
|
||||
define zeroext i32 @zext_remuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
|
||||
; RV64IM-LABEL: zext_remuw_zext_sext:
|
||||
; RV64IM: # %bb.0:
|
||||
; RV64IM-NEXT: slli a1, a1, 32
|
||||
; RV64IM-NEXT: srli a1, a1, 32
|
||||
; RV64IM-NEXT: remu a0, a0, a1
|
||||
; RV64IM-NEXT: remuw a0, a0, a1
|
||||
; RV64IM-NEXT: slli a0, a0, 32
|
||||
; RV64IM-NEXT: srli a0, a0, 32
|
||||
; RV64IM-NEXT: ret
|
||||
%1 = urem i32 %a, %b
|
||||
ret i32 %1
|
||||
|
Loading…
x
Reference in New Issue
Block a user