mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-02 15:51:54 +00:00
[ARM] Add codegen for SMMULR, SMMLAR and SMMLSR
This patch teaches the Arm back-end to generate the SMMULR, SMMLAR and SMMLSR instructions from equivalent IR patterns. Differential Revision: https://reviews.llvm.org/D41775 llvm-svn: 322361
This commit is contained in:
parent
a1f92109e8
commit
cf5af96d9c
@ -1337,6 +1337,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
|
||||
case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
|
||||
case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
|
||||
case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
|
||||
case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
|
||||
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
|
||||
case ARMISD::BFI: return "ARMISD::BFI";
|
||||
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
|
||||
@ -9860,7 +9862,7 @@ static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
|
||||
return resNode;
|
||||
}
|
||||
|
||||
static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
|
||||
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// Look for multiply add opportunities.
|
||||
@ -9877,49 +9879,61 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
|
||||
// V V
|
||||
// ADDE <- hiAdd
|
||||
//
|
||||
assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
|
||||
// In the special case where only the higher part of a signed result is used
|
||||
// and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
|
||||
// a constant with the exact value of 0x80000000, we recognize we are dealing
|
||||
// with a "rounded multiply and add" (or subtract) and transform it into
|
||||
// either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
|
||||
|
||||
assert(AddeNode->getNumOperands() == 3 &&
|
||||
AddeNode->getOperand(2).getValueType() == MVT::i32 &&
|
||||
assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
|
||||
AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
|
||||
"Expect an ADDE or SUBE");
|
||||
|
||||
assert(AddeSubeNode->getNumOperands() == 3 &&
|
||||
AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
|
||||
"ADDE node has the wrong inputs");
|
||||
|
||||
// Check that we are chained to the right ADDC node.
|
||||
SDNode* AddcNode = AddeNode->getOperand(2).getNode();
|
||||
if (AddcNode->getOpcode() != ARMISD::ADDC)
|
||||
// Check that we are chained to the right ADDC or SUBC node.
|
||||
SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
|
||||
if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
|
||||
AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
|
||||
(AddeSubeNode->getOpcode() == ARMISD::SUBE &&
|
||||
AddcSubcNode->getOpcode() != ARMISD::SUBC))
|
||||
return SDValue();
|
||||
|
||||
SDValue AddcOp0 = AddcNode->getOperand(0);
|
||||
SDValue AddcOp1 = AddcNode->getOperand(1);
|
||||
SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
|
||||
SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
|
||||
|
||||
// Check if the two operands are from the same mul_lohi node.
|
||||
if (AddcOp0.getNode() == AddcOp1.getNode())
|
||||
if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
|
||||
return SDValue();
|
||||
|
||||
assert(AddcNode->getNumValues() == 2 &&
|
||||
AddcNode->getValueType(0) == MVT::i32 &&
|
||||
assert(AddcSubcNode->getNumValues() == 2 &&
|
||||
AddcSubcNode->getValueType(0) == MVT::i32 &&
|
||||
"Expect ADDC with two result values. First: i32");
|
||||
|
||||
// Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
|
||||
// maybe a SMLAL which multiplies two 16-bit values.
|
||||
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
|
||||
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
|
||||
return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
|
||||
if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
|
||||
AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
|
||||
AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
|
||||
return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
|
||||
|
||||
// Check for the triangle shape.
|
||||
SDValue AddeOp0 = AddeNode->getOperand(0);
|
||||
SDValue AddeOp1 = AddeNode->getOperand(1);
|
||||
SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
|
||||
SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
|
||||
|
||||
// Make sure that the ADDE operands are not coming from the same node.
|
||||
if (AddeOp0.getNode() == AddeOp1.getNode())
|
||||
// Make sure that the ADDE/SUBE operands are not coming from the same node.
|
||||
if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
|
||||
return SDValue();
|
||||
|
||||
// Find the MUL_LOHI node walking up ADDE's operands.
|
||||
// Find the MUL_LOHI node walking up ADDE/SUBE's operands.
|
||||
bool IsLeftOperandMUL = false;
|
||||
SDValue MULOp = findMUL_LOHI(AddeOp0);
|
||||
SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
|
||||
if (MULOp == SDValue())
|
||||
MULOp = findMUL_LOHI(AddeOp1);
|
||||
MULOp = findMUL_LOHI(AddeSubeOp1);
|
||||
else
|
||||
IsLeftOperandMUL = true;
|
||||
if (MULOp == SDValue())
|
||||
@ -9930,63 +9944,88 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
|
||||
unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
|
||||
|
||||
// Figure out the high and low input values to the MLAL node.
|
||||
SDValue* HiAdd = nullptr;
|
||||
SDValue* LoMul = nullptr;
|
||||
SDValue* LowAdd = nullptr;
|
||||
SDValue *HiAddSub = nullptr;
|
||||
SDValue *LoMul = nullptr;
|
||||
SDValue *LowAddSub = nullptr;
|
||||
|
||||
// Ensure that ADDE is from high result of ISD::xMUL_LOHI.
|
||||
if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
|
||||
// Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
|
||||
if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
|
||||
return SDValue();
|
||||
|
||||
if (IsLeftOperandMUL)
|
||||
HiAdd = &AddeOp1;
|
||||
HiAddSub = &AddeSubeOp1;
|
||||
else
|
||||
HiAdd = &AddeOp0;
|
||||
HiAddSub = &AddeSubeOp0;
|
||||
|
||||
// Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
|
||||
// whose low result is fed to the ADDC/SUBC we are checking.
|
||||
|
||||
// Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
|
||||
// whose low result is fed to the ADDC we are checking.
|
||||
|
||||
if (AddcOp0 == MULOp.getValue(0)) {
|
||||
LoMul = &AddcOp0;
|
||||
LowAdd = &AddcOp1;
|
||||
if (AddcSubcOp0 == MULOp.getValue(0)) {
|
||||
LoMul = &AddcSubcOp0;
|
||||
LowAddSub = &AddcSubcOp1;
|
||||
}
|
||||
if (AddcOp1 == MULOp.getValue(0)) {
|
||||
LoMul = &AddcOp1;
|
||||
LowAdd = &AddcOp0;
|
||||
if (AddcSubcOp1 == MULOp.getValue(0)) {
|
||||
LoMul = &AddcSubcOp1;
|
||||
LowAddSub = &AddcSubcOp0;
|
||||
}
|
||||
|
||||
if (!LoMul)
|
||||
return SDValue();
|
||||
|
||||
// If HiAdd is the same node as ADDC or is a predecessor of ADDC the
|
||||
// replacement below will create a cycle.
|
||||
if (AddcNode == HiAdd->getNode() ||
|
||||
AddcNode->isPredecessorOf(HiAdd->getNode()))
|
||||
// If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
|
||||
// the replacement below will create a cycle.
|
||||
if (AddcSubcNode == HiAddSub->getNode() ||
|
||||
AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
|
||||
return SDValue();
|
||||
|
||||
// Create the merged node.
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
// Build operand list.
|
||||
// Start building operand list.
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
Ops.push_back(LoMul->getOperand(0));
|
||||
Ops.push_back(LoMul->getOperand(1));
|
||||
Ops.push_back(*LowAdd);
|
||||
Ops.push_back(*HiAdd);
|
||||
|
||||
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
|
||||
// Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
|
||||
// the case, we must be doing signed multiplication and only use the higher
|
||||
// part of the result of the MLAL, furthermore the LowAddSub must be a constant
|
||||
// addition or subtraction with the value of 0x800000.
|
||||
if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
|
||||
FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
|
||||
LowAddSub->getNode()->getOpcode() == ISD::Constant &&
|
||||
static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
|
||||
0x80000000) {
|
||||
Ops.push_back(*HiAddSub);
|
||||
if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
|
||||
FinalOpc = ARMISD::SMMLSR;
|
||||
} else {
|
||||
FinalOpc = ARMISD::SMMLAR;
|
||||
}
|
||||
SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
|
||||
|
||||
return SDValue(AddeSubeNode, 0);
|
||||
} else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
|
||||
// SMMLS is generated during instruction selection and the rest of this
|
||||
// function can not handle the case where AddcSubcNode is a SUBC.
|
||||
return SDValue();
|
||||
|
||||
// Finish building the operand list for {U/S}MLAL
|
||||
Ops.push_back(*LowAddSub);
|
||||
Ops.push_back(*HiAddSub);
|
||||
|
||||
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
|
||||
DAG.getVTList(MVT::i32, MVT::i32), Ops);
|
||||
|
||||
// Replace the ADDs' nodes uses by the MLA node's values.
|
||||
SDValue HiMLALResult(MLALNode.getNode(), 1);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
|
||||
|
||||
SDValue LoMLALResult(MLALNode.getNode(), 0);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
|
||||
|
||||
// Return original node to notify the driver to stop replacing.
|
||||
return SDValue(AddeNode, 0);
|
||||
return SDValue(AddeSubeNode, 0);
|
||||
}
|
||||
|
||||
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
|
||||
@ -10098,9 +10137,11 @@ static SDValue PerformAddcSubcCombine(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
|
||||
static SDValue PerformAddeSubeCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
if (Subtarget->isThumb1Only()) {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue RHS = N->getOperand(1);
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
|
||||
int64_t imm = C->getSExtValue();
|
||||
@ -10118,6 +10159,8 @@ static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
|
||||
N->getOperand(0), RHS, N->getOperand(2));
|
||||
}
|
||||
}
|
||||
} else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
|
||||
return AddCombineTo64bitMLAL(N, DCI, Subtarget);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
@ -10130,7 +10173,7 @@ static SDValue PerformADDECombine(SDNode *N,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// Only ARM and Thumb2 support UMLAL/SMLAL.
|
||||
if (Subtarget->isThumb1Only())
|
||||
return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
|
||||
return PerformAddeSubeCombine(N, DCI, Subtarget);
|
||||
|
||||
// Only perform the checks after legalize when the pattern is available.
|
||||
if (DCI.isBeforeLegalize()) return SDValue();
|
||||
@ -12338,7 +12381,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
|
||||
case ARMISD::ADDC:
|
||||
case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
|
||||
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
|
||||
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
|
||||
case ARMISD::BFI: return PerformBFICombine(N, DCI);
|
||||
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
|
||||
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
|
||||
|
@ -203,6 +203,8 @@ class VectorType;
|
||||
SMLALDX, // Signed multiply accumulate long dual exchange
|
||||
SMLSLD, // Signed multiply subtract long dual
|
||||
SMLSLDX, // Signed multiply subtract long dual exchange
|
||||
SMMLAR, // Signed multiply long, round and add
|
||||
SMMLSR, // Signed multiply long, subtract and round
|
||||
|
||||
// Operands of the standard BUILD_VECTOR node are not legalized, which
|
||||
// is fine if BUILD_VECTORs are always lowered to shuffles or other
|
||||
|
@ -105,6 +105,14 @@ def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
|
||||
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
|
||||
def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
|
||||
|
||||
def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>]>;
|
||||
|
||||
def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>;
|
||||
def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>;
|
||||
|
||||
// Node definitions.
|
||||
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
|
||||
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
|
||||
@ -4143,7 +4151,8 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
|
||||
}
|
||||
|
||||
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
|
||||
IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
|
||||
IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
|
||||
[(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, (i32 0)))]>,
|
||||
Requires<[IsARM, HasV6]>,
|
||||
Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
|
||||
let Inst{15-12} = 0b1111;
|
||||
@ -4158,7 +4167,8 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
|
||||
|
||||
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
|
||||
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
|
||||
IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
|
||||
IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
|
||||
[(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, GPR:$Ra))]>,
|
||||
Requires<[IsARM, HasV6]>,
|
||||
Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
|
||||
|
||||
@ -4170,7 +4180,8 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
|
||||
|
||||
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
|
||||
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
|
||||
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
|
||||
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
|
||||
[(set GPR:$Rd, (ARMsmmlsr GPR:$Rn, GPR:$Rm, GPR:$Ra))]>,
|
||||
Requires<[IsARM, HasV6]>,
|
||||
Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
|
||||
|
||||
|
@ -2661,7 +2661,9 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
|
||||
}
|
||||
def t2SMMUL : T2SMMUL<0b0000, "smmul", [(set rGPR:$Rd, (mulhs rGPR:$Rn,
|
||||
rGPR:$Rm))]>;
|
||||
def t2SMMULR : T2SMMUL<0b0001, "smmulr", []>;
|
||||
def t2SMMULR :
|
||||
T2SMMUL<0b0001, "smmulr",
|
||||
[(set rGPR:$Rd, (ARMsmmlar rGPR:$Rn, rGPR:$Rm, (i32 0)))]>;
|
||||
|
||||
class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
|
||||
list<dag> pattern>
|
||||
@ -2677,9 +2679,11 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
|
||||
|
||||
def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla",
|
||||
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>;
|
||||
def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>;
|
||||
def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar",
|
||||
[(set rGPR:$Rd, (ARMsmmlar rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>;
|
||||
def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", []>;
|
||||
def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>;
|
||||
def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr",
|
||||
[(set rGPR:$Rd, (ARMsmmlsr rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>;
|
||||
|
||||
class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
|
||||
list<dag> pattern>
|
||||
|
171
test/CodeGen/ARM/dsp-mlal.ll
Normal file
171
test/CodeGen/ARM/dsp-mlal.ll
Normal file
@ -0,0 +1,171 @@
|
||||
; RUN: llc -mtriple=thumbv7m -mattr=+dsp %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=armv7a %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv7m -mattr=-dsp %s -o - | FileCheck --check-prefix=NODSP %s
|
||||
|
||||
define hidden i32 @SMMULR_SMMLAR(i32 %a, i32 %b0, i32 %b1, i32 %Xn, i32 %Xn1) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMULR_SMMLAR:
|
||||
; CHECK: ldr r0, [sp]
|
||||
; CHECK-NEXT: smmulr r0, {{(r0, r2|r2, r0)}}
|
||||
; CHECK-NEXT: smmlar r0, {{(r1, r3|r3, r1)}}, r0
|
||||
; NODSP-LABEL: SMMULR_SMMLAR:
|
||||
; NODSP-NOT: smmulr
|
||||
; NODSP-NOT: smmlar
|
||||
%conv = sext i32 %b1 to i64
|
||||
%conv1 = sext i32 %Xn1 to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%add = add nsw i64 %mul, 2147483648
|
||||
%0 = and i64 %add, -4294967296
|
||||
%conv4 = sext i32 %b0 to i64
|
||||
%conv5 = sext i32 %Xn to i64
|
||||
%mul6 = mul nsw i64 %conv5, %conv4
|
||||
%add7 = add i64 %mul6, 2147483648
|
||||
%add8 = add i64 %add7, %0
|
||||
%1 = lshr i64 %add8, 32
|
||||
%conv10 = trunc i64 %1 to i32
|
||||
ret i32 %conv10
|
||||
}
|
||||
|
||||
define hidden i32 @SMMULR(i32 %a, i32 %b) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMULR:
|
||||
; CHECK: smmulr r0, {{(r0, r1|r1, r0)}}
|
||||
; NODSP-LABEL: SMMULR:
|
||||
; NODSP-NOT: smmulr
|
||||
%conv = sext i32 %a to i64
|
||||
%conv1 = sext i32 %b to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%add = add nsw i64 %mul, 2147483648
|
||||
%0 = lshr i64 %add, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
ret i32 %conv2
|
||||
}
|
||||
|
||||
define hidden i32 @SMMUL(i32 %a, i32 %b) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMUL:
|
||||
; CHECK: smmul r0, {{(r0, r1|r1, r0)}}
|
||||
; NODSP-LABEL: SMMUL:
|
||||
; NODSP-NOT: smmul
|
||||
%conv = sext i32 %a to i64
|
||||
%conv1 = sext i32 %b to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%0 = lshr i64 %mul, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
ret i32 %conv2
|
||||
}
|
||||
|
||||
define hidden i32 @SMMLSR(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMLSR:
|
||||
; CHECK: smmlsr r0, {{(r1, r2|r2, r1)}}, r0
|
||||
; NODSP-LABEL: SMMLSR:
|
||||
; NODSP-NOT: smmlsr
|
||||
%conv6 = zext i32 %a to i64
|
||||
%shl = shl nuw i64 %conv6, 32
|
||||
%conv1 = sext i32 %b to i64
|
||||
%conv2 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv2, %conv1
|
||||
%sub = or i64 %shl, 2147483648
|
||||
%add = sub i64 %sub, %mul
|
||||
%0 = lshr i64 %add, 32
|
||||
%conv3 = trunc i64 %0 to i32
|
||||
ret i32 %conv3
|
||||
}
|
||||
|
||||
define hidden i32 @NOT_SMMLSR(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: NOT_SMMLSR:
|
||||
; CHECK-NOT: smmlsr
|
||||
; NODSP-LABEL: NOT_SMMLSR:
|
||||
; NODSP-NOT: smmlsr
|
||||
%conv = sext i32 %b to i64
|
||||
%conv1 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%add = add nsw i64 %mul, 2147483648
|
||||
%0 = lshr i64 %add, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
%sub = sub nsw i32 %a, %conv2
|
||||
ret i32 %sub
|
||||
}
|
||||
|
||||
define hidden i32 @SMMLS(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMLS:
|
||||
; CHECK: smmls r0, {{(r1, r2|r2, r1)}}, r0
|
||||
; NODSP-LABEL: SMMLS:
|
||||
; NODSP-NOT: smmls
|
||||
%conv5 = zext i32 %a to i64
|
||||
%shl = shl nuw i64 %conv5, 32
|
||||
%conv1 = sext i32 %b to i64
|
||||
%conv2 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv2, %conv1
|
||||
%sub = sub nsw i64 %shl, %mul
|
||||
%0 = lshr i64 %sub, 32
|
||||
%conv3 = trunc i64 %0 to i32
|
||||
ret i32 %conv3
|
||||
}
|
||||
|
||||
define hidden i32 @NOT_SMMLS(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: NOT_SMMLS:
|
||||
; CHECK-NOT: smmls
|
||||
; NODSP-LABEL: NOT_SMMLS:
|
||||
; NODSP-NOT: smmls
|
||||
%conv = sext i32 %b to i64
|
||||
%conv1 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%0 = lshr i64 %mul, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
%sub = sub nsw i32 %a, %conv2
|
||||
ret i32 %sub
|
||||
}
|
||||
|
||||
define hidden i32 @SMMLA(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMLA:
|
||||
; CHECK: smmla r0, {{(r1, r2|r2, r1)}}, r0
|
||||
; NODSP-LABEL: SMMLA:
|
||||
; NODSP-NOT: smmla
|
||||
%conv = sext i32 %b to i64
|
||||
%conv1 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%0 = lshr i64 %mul, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
%add = add nsw i32 %conv2, %a
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
define hidden i32 @SMMLAR(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: SMMLAR:
|
||||
; CHECK: smmlar r0, {{(r1, r2|r2, r1)}}, r0
|
||||
; NODSP-LABEL: SMMLAR:
|
||||
; NODSP-NOT: smmlar
|
||||
%conv7 = zext i32 %a to i64
|
||||
%shl = shl nuw i64 %conv7, 32
|
||||
%conv1 = sext i32 %b to i64
|
||||
%conv2 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv2, %conv1
|
||||
%add = or i64 %shl, 2147483648
|
||||
%add3 = add i64 %add, %mul
|
||||
%0 = lshr i64 %add3, 32
|
||||
%conv4 = trunc i64 %0 to i32
|
||||
ret i32 %conv4
|
||||
}
|
||||
|
||||
define hidden i32 @NOT_SMMLA(i32 %a, i32 %b, i32 %c) local_unnamed_addr {
|
||||
entry:
|
||||
; CHECK-LABEL: NOT_SMMLA:
|
||||
; CHECK-NOT: smmla
|
||||
; NODSP-LABEL: NOT_SMMLA:
|
||||
; NODSP-NOT: smmla
|
||||
%conv = sext i32 %b to i64
|
||||
%conv1 = sext i32 %c to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
%0 = lshr i64 %mul, 32
|
||||
%conv2 = trunc i64 %0 to i32
|
||||
%add = xor i32 %conv2, -2147483648
|
||||
%add3 = add i32 %add, %a
|
||||
ret i32 %add3
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user