Thumb2 and ARM add/subtract with carry fixes.

Fixes Thumb2 ADCS and SBCS lowering: <rdar://problem/9275821>.
t2ADCS/t2SBCS are now pseudo instructions, consistent with ARM, so the
assembly printer correctly prints the 's' suffix.

Fixes Thumb2 adde -> SBC matching to check for live/dead carry flags.

Fixes the internal ARM machine opcode mnemonic for ADCS/SBCS.
Fixes ARM SBC lowering to check for live carry (potential bug).

llvm-svn: 130048
This commit is contained in:
Andrew Trick 2011-04-23 03:55:32 +00:00
parent db92825944
commit a130d110d1
8 changed files with 147 additions and 123 deletions

View File

@ -5040,6 +5040,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
// FIXME: This opcode table should obviously be expressed in the target
// description. We probably just need a "machine opcode" value in the pseudo
// instruction. But the ideal solution maybe to simply remove the "S" version
// of the opcode altogether.
struct AddSubFlagsOpcodePair {
unsigned PseudoOpc;
unsigned MachineOpc;
};
static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::ADCSri, ARM::ADCri},
{ARM::ADCSrr, ARM::ADCrr},
{ARM::ADCSrs, ARM::ADCrs},
{ARM::SBCSri, ARM::SBCri},
{ARM::SBCSrr, ARM::SBCrr},
{ARM::SBCSrs, ARM::SBCrs},
{ARM::RSBSri, ARM::RSBri},
{ARM::RSBSrr, ARM::RSBrr},
{ARM::RSBSrs, ARM::RSBrs},
{ARM::RSCSri, ARM::RSCri},
{ARM::RSCSrs, ARM::RSCrs},
{ARM::t2ADCSri, ARM::t2ADCri},
{ARM::t2ADCSrr, ARM::t2ADCrr},
{ARM::t2ADCSrs, ARM::t2ADCrs},
{ARM::t2SBCSri, ARM::t2SBCri},
{ARM::t2SBCSrr, ARM::t2SBCrr},
{ARM::t2SBCSrs, ARM::t2SBCrs},
{ARM::t2RSBSri, ARM::t2RSBri},
{ARM::t2RSBSrs, ARM::t2RSBrs},
};
// Convert and Add or Subtract with Carry and Flags to a generic opcode with
// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
//
// FIXME: Somewhere we should assert that CPSR<def> is in the correct
// position to be recognized by the target descrition as the 'S' bit.
bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
MachineBasicBlock *BB) const {
unsigned OldOpc = MI->getOpcode();
unsigned NewOpc = 0;
// This is only called for instructions that need remapping, so iterating over
// the tiny opcode table is not costly.
static const int NPairs =
sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
*End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
if (OldOpc == Pair->PseudoOpc) {
NewOpc = Pair->MachineOpc;
break;
}
}
if (!NewOpc)
return false;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
for (unsigned i = 0; i < MI->getNumOperands(); ++i)
MIB.addOperand(MI->getOperand(i));
AddDefaultPred(MIB);
MIB.addReg(ARM::CPSR, RegState::Define); // S bit
MI->eraseFromParent();
return true;
}
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@ -5047,10 +5113,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default:
default: {
if (RemapAddSubWithFlags(MI, BB))
return BB;
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
}
case ARM::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
case ARM::ATOMIC_LOAD_ADD_I16:
@ -5101,68 +5170,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
case ARM::ADCSSri:
case ARM::ADCSSrr:
case ARM::ADCSSrs:
case ARM::SBCSSri:
case ARM::SBCSSrr:
case ARM::SBCSSrs:
case ARM::RSBSri:
case ARM::RSBSrr:
case ARM::RSBSrs:
case ARM::RSCSri:
case ARM::RSCSrs: {
unsigned OldOpc = MI->getOpcode();
unsigned Opc = 0;
switch (OldOpc) {
case ARM::ADCSSrr:
Opc = ARM::ADCrr;
break;
case ARM::ADCSSri:
Opc = ARM::ADCri;
break;
case ARM::ADCSSrs:
Opc = ARM::ADCrs;
break;
case ARM::SBCSSrr:
Opc = ARM::SBCrr;
break;
case ARM::SBCSSri:
Opc = ARM::SBCri;
break;
case ARM::SBCSSrs:
Opc = ARM::SBCrs;
break;
case ARM::RSBSri:
Opc = ARM::RSBri;
break;
case ARM::RSBSrr:
Opc = ARM::RSBrr;
break;
case ARM::RSBSrs:
Opc = ARM::RSBrs;
break;
case ARM::RSCSri:
Opc = ARM::RSCri;
break;
case ARM::RSCSrs:
Opc = ARM::RSCrs;
break;
default:
llvm_unreachable("Unknown opcode?");
}
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(Opc));
for (unsigned i = 0; i < MI->getNumOperands(); ++i)
MIB.addOperand(MI->getOperand(i));
AddDefaultPred(MIB);
MIB.addReg(ARM::CPSR, RegState::Define); // S bit
MI->eraseFromParent();
return BB;
}
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@ -5474,7 +5481,7 @@ static SDValue PerformANDCombine(SDNode *N,
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
@ -5510,7 +5517,7 @@ static SDValue PerformORCombine(SDNode *N,
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;

View File

@ -485,6 +485,7 @@ namespace llvm {
unsigned Size,
unsigned BinOpcode) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
};
enum NEONModImmType {

View File

@ -940,16 +940,16 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
// NOTE: CPSR def omitted because it will be handled by the custom inserter.
let usesCustomInserter = 1 in {
multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
def Sri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
Size4Bytes, IIC_iALUi,
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
Size4Bytes, IIC_iALUi,
[(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
def Srr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
Size4Bytes, IIC_iALUr,
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
Size4Bytes, IIC_iALUr,
[(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
let isCommutable = Commutable;
}
def Srs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
Size4Bytes, IIC_iALUsr,
def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
Size4Bytes, IIC_iALUsr,
[(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
}
}
@ -2330,8 +2330,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
(SBCri GPR:$src, so_imm_not:$imm)>;
def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
(SBCSri GPR:$src, so_imm_not:$imm)>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function

View File

@ -681,49 +681,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{24-21} = opcod;
}
}
}
// Carry setting variants
let isCodeGenOnly = 1, Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// NOTE: CPSR def omitted because it will be handled by the custom inserter.
let usesCustomInserter = 1 in {
multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
// shifted imm
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, "\t$Rd, $Rn, $imm",
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{20} = 1; // The S bit.
let Inst{15} = 0;
}
def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
Size4Bytes, IIC_iALUi,
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, ".w\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2]> {
def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
Size4Bytes, IIC_iALUr,
[(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
let Inst{20} = 1; // The S bit.
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
let Inst{20} = 1; // The S bit.
}
}
def rs : t2PseudoInst<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
Size4Bytes, IIC_iALUsi,
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
}
}
@ -1803,10 +1781,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
@ -1837,9 +1813,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
let AddedComplexity = 1 in
def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
(t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
(t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only

View File

@ -19,3 +19,20 @@ entry:
%tmp2 = sub i64 %tmp1, %b
ret i64 %tmp2
}
; add with live carry
define i64 @f3(i32 %al, i32 %bl) {
; CHECK: f3:
; CHECK: adds r
; CHECK: adcs r
; CHECK: adc r
entry:
; unsigned wide add
%aw = zext i32 %al to i64
%bw = zext i32 %bl to i64
%cw = add i64 %aw, %bw
; ch == carry bit
%ch = lshr i64 %cw, 32
%dw = add i64 %ch, %bw
ret i64 %dw
}

View File

@ -52,3 +52,18 @@ define i64 @f6(i64 %a) {
ret i64 %tmp
}
; Example from numerics code that manually computes wider-than-64 values.
;
; CHECK: _livecarry:
; CHECK: adds
; CHECK: adcs
; CHECK: adc
define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
%ch = lshr i64 %carry, 32
%cl = and i64 %carry, 4294967295
%truncdigit = zext i32 %digit to i64
%prod = add i64 %cl, %truncdigit
%ph = lshr i64 %prod, 32
%carryresult = add i64 %ch, %ph
ret i64 %carryresult
}

View File

@ -4,7 +4,7 @@
define i64 @f1(i64 %a) {
; CHECK: f1
; CHECK: subs r0, #171
; CHECK: adc r1, r1, #-1
; CHECK: sbc r1, r1, #0
%tmp = sub i64 %a, 171
ret i64 %tmp
}
@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
define i64 @f2(i64 %a) {
; CHECK: f2
; CHECK: subs.w r0, r0, #1179666
; CHECK: adc r1, r1, #-1
; CHECK: sbc r1, r1, #0
%tmp = sub i64 %a, 1179666
ret i64 %tmp
}
@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
define i64 @f3(i64 %a) {
; CHECK: f3
; CHECK: subs.w r0, r0, #872428544
; CHECK: adc r1, r1, #-1
; CHECK: sbc r1, r1, #0
%tmp = sub i64 %a, 872428544
ret i64 %tmp
}
@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
define i64 @f4(i64 %a) {
; CHECK: f4
; CHECK: subs.w r0, r0, #1448498774
; CHECK: adc r1, r1, #-1
; CHECK: sbc r1, r1, #0
%tmp = sub i64 %a, 1448498774
ret i64 %tmp
}
@ -40,7 +40,7 @@ define i64 @f4(i64 %a) {
define i64 @f5(i64 %a) {
; CHECK: f5
; CHECK: subs.w r0, r0, #66846720
; CHECK: adc r1, r1, #-1
; CHECK: sbc r1, r1, #0
%tmp = sub i64 %a, 66846720
ret i64 %tmp
}

View File

@ -1,9 +1,10 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
define i64 @f1(i64 %a, i64 %b) {
; CHECK: f1:
; CHECK: subs r0, r0, r2
; CHECK: sbcs r1, r3
; CHECK: subs.w r0, r0, r2
; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
; CHECK: sbc.w r1, r1, r3
%tmp = sub i64 %a, %b
ret i64 %tmp
}