From 1c3af779fc6b184204efd7e98dc16e475c251e7f Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 23 Apr 2011 03:55:32 +0000 Subject: [PATCH] Thumb2 and ARM add/subtract with carry fixes. Fixes Thumb2 ADCS and SBCS lowering: . t2ADCS/t2SBCS are now pseudo instructions, consistent with ARM, so the assembly printer correctly prints the 's' suffix. Fixes Thumb2 adde -> SBC matching to check for live/dead carry flags. Fixes the internal ARM machine opcode mnemonic for ADCS/SBCS. Fixes ARM SBC lowering to check for live carry (potential bug). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130048 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 139 +++++++++++++++-------------- lib/Target/ARM/ARMISelLowering.h | 1 + lib/Target/ARM/ARMInstrInfo.td | 16 ++-- lib/Target/ARM/ARMInstrThumb2.td | 65 +++++--------- test/CodeGen/ARM/carry.ll | 17 ++++ test/CodeGen/Thumb2/thumb2-sbc.ll | 15 ++++ test/CodeGen/Thumb2/thumb2-sub3.ll | 10 +-- test/CodeGen/Thumb2/thumb2-sub5.ll | 7 +- 8 files changed, 147 insertions(+), 123 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2d555cd7828..94dc6c2b859 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5040,6 +5040,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +// FIXME: This opcode table should obviously be expressed in the target +// description. We probably just need a "machine opcode" value in the pseudo +// instruction. But the ideal solution maybe to simply remove the "S" version +// of the opcode altogether. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADCSri, ARM::ADCri}, + {ARM::ADCSrr, ARM::ADCrr}, + {ARM::ADCSrs, ARM::ADCrs}, + {ARM::SBCSri, ARM::SBCri}, + {ARM::SBCSrr, ARM::SBCrr}, + {ARM::SBCSrs, ARM::SBCrs}, + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrs, ARM::RSBrs}, + {ARM::RSCSri, ARM::RSCri}, + {ARM::RSCSrs, ARM::RSCrs}, + {ARM::t2ADCSri, ARM::t2ADCri}, + {ARM::t2ADCSrr, ARM::t2ADCrr}, + {ARM::t2ADCSrs, ARM::t2ADCrs}, + {ARM::t2SBCSri, ARM::t2SBCri}, + {ARM::t2SBCSrr, ARM::t2SBCrr}, + {ARM::t2SBCSrs, ARM::t2SBCrs}, + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +// Convert and Add or Subtract with Carry and Flags to a generic opcode with +// CPSR operand. e.g. ADCS (...) -> ADC (... CPSR). +// +// FIXME: Somewhere we should assert that CPSR is in the correct +// position to be recognized by the target descrition as the 'S' bit. +bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, + MachineBasicBlock *BB) const { + unsigned OldOpc = MI->getOpcode(); + unsigned NewOpc = 0; + + // This is only called for instructions that need remapping, so iterating over + // the tiny opcode table is not costly. + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { + if (OldOpc == Pair->PseudoOpc) { + NewOpc = Pair->MachineOpc; + break; + } + } + if (!NewOpc) + return false; + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MI->eraseFromParent(); + return true; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -5047,10 +5113,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { - default: + default: { + if (RemapAddSubWithFlags(MI, BB)) + return BB; + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); - + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -5101,68 +5170,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); - case ARM::ADCSSri: - case ARM::ADCSSrr: - case ARM::ADCSSrs: - case ARM::SBCSSri: - case ARM::SBCSSrr: - case ARM::SBCSSrs: - case ARM::RSBSri: - case ARM::RSBSrr: - case ARM::RSBSrs: - case ARM::RSCSri: - case ARM::RSCSrs: { - unsigned OldOpc = MI->getOpcode(); - unsigned Opc = 0; - switch (OldOpc) { - case ARM::ADCSSrr: - Opc = ARM::ADCrr; - break; - case ARM::ADCSSri: - Opc = ARM::ADCri; - break; - case ARM::ADCSSrs: - Opc = ARM::ADCrs; - break; - case ARM::SBCSSrr: - Opc = ARM::SBCrr; - break; - case ARM::SBCSSri: - Opc = ARM::SBCri; - break; - case ARM::SBCSSrs: - Opc = ARM::SBCrs; - break; - case ARM::RSBSri: - Opc = ARM::RSBri; - break; - case ARM::RSBSrr: - Opc = ARM::RSBrr; - break; - case ARM::RSBSrs: - Opc = ARM::RSBrs; - break; - case ARM::RSCSri: - Opc = ARM::RSCri; - break; - case ARM::RSCSrs: - Opc = ARM::RSCrs; - break; - default: - llvm_unreachable("Unknown opcode?"); - } - - MachineInstrBuilder MIB = - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(Opc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - AddDefaultPred(MIB); - MIB.addReg(ARM::CPSR, RegState::Define); // S bit - MI->eraseFromParent(); - return BB; - } - - case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -5474,7 +5481,7 @@ static SDValue PerformANDCombine(SDNode *N, if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5510,7 +5517,7 @@ static SDValue PerformORCombine(SDNode *N, if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 6fc77cff2d8..5befcee6f29 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -485,6 +485,7 @@ namespace llvm { unsigned Size, unsigned BinOpcode) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; enum NEONModImmType { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b2961f82548..48110f805fe 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -940,16 +940,16 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, // NOTE: CPSR def omitted because it will be handled by the custom inserter. let usesCustomInserter = 1 in { multiclass AI1_adde_sube_s_irs { - def Sri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - Size4Bytes, IIC_iALUi, + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; - def Srr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - Size4Bytes, IIC_iALUr, + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { let isCommutable = Commutable; } - def Srs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - Size4Bytes, IIC_iALUsr, + def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; } } @@ -2330,8 +2330,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde GPR:$src, so_imm_not:$imm), +def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), + (SBCSri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ad6888c5b60..9cad3d00945 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -681,49 +681,27 @@ multiclass T2I_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{24-21} = opcod; } } +} // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass T2I_adde_sube_s_irs opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass T2I_adde_sube_s_irs { // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2]> { + def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } -} + def rs : t2PseudoInst< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + Size4Bytes, IIC_iALUsi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -1803,10 +1781,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs, 1>; +defm t2SBCS : T2I_adde_sube_s_irs>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", @@ -1837,9 +1813,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), + (t2SBCri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), + (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index a6a7ed6af18..9b90408cc4d 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -19,3 +19,20 @@ entry: %tmp2 = sub i64 %tmp1, %b ret i64 %tmp2 } + +; add with live carry +define i64 @f3(i32 %al, i32 %bl) { +; CHECK: f3: +; CHECK: adds r +; CHECK: adcs r +; CHECK: adc r +entry: + ; unsigned wide add + %aw = zext i32 %al to i64 + %bw = zext i32 %bl to i64 + %cw = add i64 %aw, %bw + ; ch == carry bit + %ch = lshr i64 %cw, 32 + %dw = add i64 %ch, %bw + ret i64 %dw +} diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll index 09e3fdd8538..f8d8d277d12 100644 --- a/test/CodeGen/Thumb2/thumb2-sbc.ll +++ b/test/CodeGen/Thumb2/thumb2-sbc.ll @@ -52,3 +52,18 @@ define i64 @f6(i64 %a) { ret i64 %tmp } +; Example from numerics code that manually computes wider-than-64 values. +; +; CHECK: _livecarry: +; CHECK: adds +; CHECK: adcs +; CHECK: adc +define i64 @livecarry(i64 %carry, i32 %digit) nounwind { + %ch = lshr i64 %carry, 32 + %cl = and i64 %carry, 4294967295 + %truncdigit = zext i32 %digit to i64 + %prod = add i64 %cl, %truncdigit + %ph = lshr i64 %prod, 32 + %carryresult = add i64 %ch, %ph + ret i64 %carryresult +} diff --git a/test/CodeGen/Thumb2/thumb2-sub3.ll b/test/CodeGen/Thumb2/thumb2-sub3.ll index 855ad06a63f..1dbda57f236 100644 --- a/test/CodeGen/Thumb2/thumb2-sub3.ll +++ b/test/CodeGen/Thumb2/thumb2-sub3.ll @@ -4,7 +4,7 @@ define i64 @f1(i64 %a) { ; CHECK: f1 ; CHECK: subs r0, #171 -; CHECK: adc r1, r1, #-1 +; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 171 ret i64 %tmp } @@ -13,7 +13,7 @@ define i64 @f1(i64 %a) { define i64 @f2(i64 %a) { ; CHECK: f2 ; CHECK: subs.w r0, r0, #1179666 -; CHECK: adc r1, r1, #-1 +; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 1179666 ret i64 %tmp } @@ -22,7 +22,7 @@ define i64 @f2(i64 %a) { define i64 @f3(i64 %a) { ; CHECK: f3 ; CHECK: subs.w r0, r0, #872428544 -; CHECK: adc r1, r1, #-1 +; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 872428544 ret i64 %tmp } @@ -31,7 +31,7 @@ define i64 @f3(i64 %a) { define i64 @f4(i64 %a) { ; CHECK: f4 ; CHECK: subs.w r0, r0, #1448498774 -; CHECK: adc r1, r1, #-1 +; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 1448498774 ret i64 %tmp } @@ -40,7 +40,7 @@ define i64 @f4(i64 %a) { define i64 @f5(i64 %a) { ; CHECK: f5 ; CHECK: subs.w r0, r0, #66846720 -; CHECK: adc r1, r1, #-1 +; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 66846720 ret i64 %tmp } diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll index c3b56bc09c8..6edd789beec 100644 --- a/test/CodeGen/Thumb2/thumb2-sub5.ll +++ b/test/CodeGen/Thumb2/thumb2-sub5.ll @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK: f1: -; CHECK: subs r0, r0, r2 -; CHECK: sbcs r1, r3 +; CHECK: subs.w r0, r0, r2 +; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs +; CHECK: sbc.w r1, r1, r3 %tmp = sub i64 %a, %b ret i64 %tmp }