CodeGen: further loosen -O0 CG for WoA division

Generate the slowest possible codepath for noopt CodeGen.  Even trying to be
clever with the negated jump can cause out-of-range jumps.  Use a wide branch
instead. Although the code is modelled simplistically, the later optimizations
would recombine the branching into `cbz` if possible.  This re-enables the
previous optimization as well as hopefully gives us working code in all cases.

Addresses PR30356!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285649 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Saleem Abdulrasool 2016-10-31 22:12:37 +00:00
parent 485ef16bd4
commit f185e91e2b
4 changed files with 44 additions and 33 deletions

View File

@ -8593,12 +8593,20 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
ContBB->splice(ContBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
ContBB->transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(*MBB, MI, DL, TII->get(ARM::tCBNZ))
.addReg(MI.getOperand(0).getReg())
.addMBB(ContBB);
MBB->addSuccessor(ContBB);
BuildMI(*MBB, MI, DL, TII->get(ARM::t__brkdiv0));
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
MF->push_back(TrapBB);
MBB->addSuccessor(TrapBB);
AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
.addReg(MI.getOperand(0).getReg())
.addImm(0));
BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::EQ)
.addReg(ARM::CPSR);
MI.eraseFromParent();
return ContBB;

View File

@ -75,7 +75,7 @@ return:
; CHECK-MOD-DAG: BB#2
; RUN: llc -mtriple thumbv7--windows-itanium -print-machineinstrs=expand-isel-pseudos -verify-machineinstrs -filetype asm -o /dev/null %s 2>&1 | FileCheck %s -check-prefix CHECK-CFG
; RUN: llc -mtriple thumbv7--windows-itanium -print-machineinstrs=expand-isel-pseudos -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-CFG-ASM
; RUN: llc -mtriple thumbv7--windows-itanium -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-CFG-ASM
; unsigned c;
; extern unsigned long g(void);
@ -119,21 +119,24 @@ attributes #0 = { optsize }
; CHECK-CFG-DAG: t2B <BB#3>
; CHECK-CFG-DAG: BB#2
; CHECK-CFG-DAG: tCBNZ %vreg{{[0-9]}}, <BB#4>
; CHECK-CFG-DAG: t__brkdiv0
; CHECK-CFG-DAG: tCMPi8 %vreg{{[0-9]}}, 0
; CHECK-CFG-DAG: t2Bcc <BB#5>
; CHECK-CFG-DAG: BB#4
; CHECK-CFG-DAG: BB#3
; CHECK-CFG-DAG: tBX_RET
; CHECK-CFG-ASM-LABEL: h:
; CHECK-CFG-ASM: cbnz r{{[0-9]}}, .LBB2_2
; CHECK-CFG-ASM: __brkdiv0
; CHECK-CFG-ASM-LABEL: .LBB2_2:
; CHECK-CFG-ASM: bl __rt_udiv
; CHECK-CFG-DAG: BB#5
; CHECK-CFG-DAG: t__brkdiv0
; RUN: llc -O0 -mtriple thumbv7--windows-itanium -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-WIN__DBZCHK
; CHECK-CFG-ASM-LABEL: h:
; CHECK-CFG-ASM: cbz r{{[0-9]}}, .LBB2_4
; CHECK-CFG-ASM: bl __rt_udiv
; CHECK-CFG-ASM-LABEL: .LBB2_4:
; CHECK-CFG-ASM: __brkdiv0
; RUN: llc -O1 -mtriple thumbv7--windows-itanium -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-WIN__DBZCHK
; long k(void);
; int l(void);
@ -172,11 +175,11 @@ return:
}
; CHECK-WIN__DBZCHK-LABEL: j:
; CHECK-WIN__DBZCHK: cbnz r{{[0-7]}}, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz r8, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz r9, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz r10, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz r11, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz ip, .LBB
; CHECK-WIN__DBZCHK-NOT: cbnz lr, .LBB
; CHECK-WIN__DBZCHK: cbz r{{[0-7]}}, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz r8, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz r9, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz r10, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz r11, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz ip, .LBB
; CHECK-WIN__DBZCHK-NOT: cbz lr, .LBB

View File

@ -9,7 +9,7 @@ entry:
ret i32 %div
}
; CHECK: cbnz r1, #0
; CHECK: __brkdiv0
; CHECK: cmp r1, #0
; CHECK: beq #
; CHECK: bl

View File

@ -8,9 +8,9 @@ entry:
}
; CHECK-LABEL: sdiv32:
; CHECK: cbnz r0
; CHECK: __brkdiv0
; CHECK: cbz r0
; CHECK: bl __rt_sdiv
; CHECK: __brkdiv0
define arm_aapcs_vfpcc i32 @udiv32(i32 %divisor, i32 %divident) {
entry:
@ -19,9 +19,9 @@ entry:
}
; CHECK-LABEL: udiv32:
; CHECK: cbnz r0
; CHECK: __brkdiv0
; CHECK: cbz r0
; CHECK: bl __rt_udiv
; CHECK: __brkdiv0
define arm_aapcs_vfpcc i64 @sdiv64(i64 %divisor, i64 %divident) {
entry:
@ -30,10 +30,10 @@ entry:
}
; CHECK-LABEL: sdiv64:
; CHECK: orr.w r4, r0, r1
; CHECK-NEXT: cbnz r4
; CHECK: __brkdiv0
; CHECK: orrs.w r4, r0, r1
; CHECK-NEXT: beq
; CHECK: bl __rt_sdiv64
; CHECK: __brkdiv0
define arm_aapcs_vfpcc i64 @udiv64(i64 %divisor, i64 %divident) {
entry:
@ -42,8 +42,8 @@ entry:
}
; CHECK-LABEL: udiv64:
; CHECK: orr.w r4, r0, r1
; CHECK-NEXT: cbnz r4
; CHECK: __brkdiv0
; CHECK: orrs.w r4, r0, r1
; CHECK-NEXT: beq
; CHECK: bl __rt_udiv64
; CHECK: __brkdiv0