From 5b9fcd1c8e9f2b7964a82cd383441f568890b561 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 7 Jul 2009 01:17:28 +0000 Subject: [PATCH] Add some more Thumb2 multiplication instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74889 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 29 ++++++-- lib/Target/ARM/ARMISelLowering.cpp | 8 +-- lib/Target/ARM/ARMInstrThumb2.td | 108 +++++++++++++++++++++++++++- test/CodeGen/Thumb2/thumb2-mulhi.ll | 20 ++++++ test/CodeGen/Thumb2/thumb2-smla.ll | 10 +++ test/CodeGen/Thumb2/thumb2-smul.ll | 23 ++++++ 6 files changed, 187 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/Thumb2/thumb2-mulhi.ll create mode 100644 test/CodeGen/Thumb2/thumb2-smla.ll create mode 100644 test/CodeGen/Thumb2/thumb2-smul.ll diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6485fc1d360..5d8925f1bd4 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -923,7 +923,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { break; } case ISD::MUL: - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) break; if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { unsigned RHSV = C->getZExtValue(); @@ -953,20 +953,37 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { Op.getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + if (Subtarget->isThumb1Only()) + break; + if (Subtarget->isThumb()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getTargetNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); + } else { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } } case ISD::SMUL_LOHI: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + if (Subtarget->isThumb1Only()) + break; + if (Subtarget->isThumb()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); + } else { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } } case ISD::LOAD: { SDNode *ResNode = 0; - if (Subtarget->isThumb2()) + if (Subtarget->isThumb() && Subtarget->hasThumb2()) ResNode = SelectT2IndexedLoad(Op); else ResNode = SelectARMIndexedLoad(Op); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 41c9ecc43a9..cd837786778 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -246,7 +246,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // i64 operation support. - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); @@ -255,7 +255,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } else { setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); - if (!Subtarget->hasV6Ops()) + if (!Subtarget->isThumb() && !Subtarget->hasV6Ops()) setOperationAction(ISD::MULHS, MVT::i32, Expand); } setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -3034,7 +3034,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) + if (Subtarget->isThumb() && Subtarget->hasThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); else @@ -3071,7 +3071,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) + if (Subtarget->isThumb() && Subtarget->hasThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); else diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 85edae4f167..e1a526d2231 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -838,7 +838,113 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; -// FIXME: SMULL, etc. +// Extra precision multiplies with low / high results +let neverHasSideEffects = 1 in { +let isCommutable = 1 in { +def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), + "smull", " $ldst, $hdst, $a, $b", []>; + +def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), + "umull", " $ldst, $hdst, $a, $b", []>; +} + +// Multiply + accumulate +def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), + "smlal", " $ldst, $hdst, $a, $b", []>; + +def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), + "umlal", " $ldst, $hdst, $a, $b", []>; + +def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), + "umaal", " $ldst, $hdst, $a, $b", []>; +} // neverHasSideEffects + +// Most significant word multiply +def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + "smmul", " $dst, $a, $b", + [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; + +def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; + + +def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmls", " $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; + +multiclass T2I_smul { + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16)))]>; + + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, (i32 16))))]>; + + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), + (sext_inreg GPR:$b, i16)))]>; + + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16))))]>; + + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wb"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), (i32 16)))]>; + + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wt"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra GPR:$b, (i32 16))), (i32 16)))]>; +} + + +multiclass T2I_smla { + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, + (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16))))]>; + + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, (i32 16)))))]>; + + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sext_inreg GPR:$b, i16))))]>; + + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16)))))]>; + + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), (i32 16))))]>; + + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra GPR:$b, (i32 16))), (i32 16))))]>; +} + +defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; + +// TODO: Halfword multiple accumulate long: SMLAL +// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD + //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll new file mode 100644 index 00000000000..7b41509f79c --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep smmul | count 1 +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep umull | count 1 + +define i32 @smulhi(i32 %x, i32 %y) { + %tmp = sext i32 %x to i64 ; [#uses=1] + %tmp1 = sext i32 %y to i64 ; [#uses=1] + %tmp2 = mul i64 %tmp1, %tmp ; [#uses=1] + %tmp3 = lshr i64 %tmp2, 32 ; [#uses=1] + %tmp3.upgrd.1 = trunc i64 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp3.upgrd.1 +} + +define i32 @umulhi(i32 %x, i32 %y) { + %tmp = zext i32 %x to i64 ; [#uses=1] + %tmp1 = zext i32 %y to i64 ; [#uses=1] + %tmp2 = mul i64 %tmp1, %tmp ; [#uses=1] + %tmp3 = lshr i64 %tmp2, 32 ; [#uses=1] + %tmp3.upgrd.2 = trunc i64 %tmp3 to i32 ; [#uses=1] + ret i32 %tmp3.upgrd.2 +} diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll new file mode 100644 index 00000000000..0c65f235d6f --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-smla.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \ +; RUN: grep smlabt | count 1 + +define i32 @f3(i32 %a, i16 %x, i32 %y) { + %tmp = sext i16 %x to i32 ; [#uses=1] + %tmp2 = ashr i32 %y, 16 ; [#uses=1] + %tmp3 = mul i32 %tmp2, %tmp ; [#uses=1] + %tmp5 = add i32 %tmp3, %a ; [#uses=1] + ret i32 %tmp5 +} diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll new file mode 100644 index 00000000000..ebbb982bdeb --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-smul.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \ +; RUN: grep smulbt | count 1 +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \ +; RUN: grep smultt | count 1 + +@x = weak global i16 0 ; [#uses=1] +@y = weak global i16 0 ; [#uses=0] + +define i32 @f1(i32 %y) { + %tmp = load i16* @x ; [#uses=1] + %tmp1 = add i16 %tmp, 2 ; [#uses=1] + %tmp2 = sext i16 %tmp1 to i32 ; [#uses=1] + %tmp3 = ashr i32 %y, 16 ; [#uses=1] + %tmp4 = mul i32 %tmp2, %tmp3 ; [#uses=1] + ret i32 %tmp4 +} + +define i32 @f2(i32 %x, i32 %y) { + %tmp1 = ashr i32 %x, 16 ; [#uses=1] + %tmp3 = ashr i32 %y, 16 ; [#uses=1] + %tmp4 = mul i32 %tmp3, %tmp1 ; [#uses=1] + ret i32 %tmp4 +}