From 17e0ee5078173356241cb860e2408100ce85ddf4 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 17 Sep 2014 17:46:47 +0000 Subject: [PATCH] [FastISel][AArch64] Improve branch selection to support all FP conditions. This adds the last two missing floating-point condition codes (FCMP_UEQ and FCMP_ONE) also to the branch selection. In these two cases an additonal branch instruction is required. This also adds unit tests to checks all the different condition codes. This is related o rdar://problem/18358882. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217966 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 58 +++- test/CodeGen/AArch64/fast-isel-cmp-branch.ll | 293 +++++++++++++++++++ 2 files changed, 341 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/AArch64/fast-isel-cmp-branch.ll diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 4f5de90afcf..826c4c089a8 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1686,16 +1686,55 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // We may not handle every CC for now. - CC = getCompareCC(CI->getPredicate()); - if (CC == AArch64CC::AL) - return false; + if (CI->hasOneUse() && isValueAvailable(CI)) { + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + switch (Predicate) { + default: + break; + case CmpInst::FCMP_FALSE: + fastEmitBranch(FBB, DbgLoc); + return true; + case CmpInst::FCMP_TRUE: + fastEmitBranch(TBB, DbgLoc); + return true; + } + + // Try to take advantage of fallthrough opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; + // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch + // instruction. + CC = getCompareCC(Predicate); + AArch64CC::CondCode ExtraCC = AArch64CC::AL; + switch (Predicate) { + default: + break; + case CmpInst::FCMP_UEQ: + ExtraCC = AArch64CC::EQ; + CC = AArch64CC::VS; + break; + case CmpInst::FCMP_ONE: + ExtraCC = AArch64CC::MI; + CC = AArch64CC::GT; + break; + } + assert((CC != AArch64CC::AL) && "Unexpected condition code."); + + // Emit the extra branch for FCMP_UEQ and FCMP_ONE. + if (ExtraCC != AArch64CC::AL) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(ExtraCC) + .addMBB(TBB); + } + // Emit the branch. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) .addImm(CC) @@ -1713,8 +1752,8 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { MVT SrcVT; - if (TI->hasOneUse() && TI->getParent() == I->getParent() && - (isTypeSupported(TI->getOperand(0)->getType(), SrcVT))) { + if (TI->hasOneUse() && isValueAvailable(TI) && + isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { unsigned CondReg = getRegForValue(TI->getOperand(0)); if (!CondReg) return false; @@ -1749,8 +1788,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { fastEmitBranch(FBB, DbgLoc); return true; } - } else if (const ConstantInt *CI = - dyn_cast(BI->getCondition())) { + } else if (const auto *CI = dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) @@ -2534,7 +2572,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, } // Check if both instructions are in the same basic block. - if (II->getParent() != I->getParent()) + if (!isValueAvailable(II)) return false; // Make sure nothing is in the way diff --git a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll new file mode 100644 index 00000000000..3651f194efd --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll @@ -0,0 +1,293 @@ +; RUN: llc -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define i32 @fcmp_oeq(float %x, float %y) { +; CHECK-LABEL: fcmp_oeq +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.ne {{LBB.+_2}} + %1 = fcmp oeq float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ogt(float %x, float %y) { +; CHECK-LABEL: fcmp_ogt +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.le {{LBB.+_2}} + %1 = fcmp ogt float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_oge(float %x, float %y) { +; CHECK-LABEL: fcmp_oge +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.lt {{LBB.+_2}} + %1 = fcmp oge float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_olt(float %x, float %y) { +; CHECK-LABEL: fcmp_olt +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.pl {{LBB.+_2}} + %1 = fcmp olt float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ole(float %x, float %y) { +; CHECK-LABEL: fcmp_ole +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.hi {{LBB.+_2}} + %1 = fcmp ole float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_one(float %x, float %y) { +; CHECK-LABEL: fcmp_one +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.mi +; CHECK-NEXT: b.gt + %1 = fcmp one float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ord(float %x, float %y) { +; CHECK-LABEL: fcmp_ord +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.vs {{LBB.+_2}} + %1 = fcmp ord float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uno(float %x, float %y) { +; CHECK-LABEL: fcmp_uno +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.vs {{LBB.+_2}} + %1 = fcmp uno float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ueq(float %x, float %y) { +; CHECK-LABEL: fcmp_ueq +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.eq {{LBB.+_2}} +; CHECK-NEXT: b.vs {{LBB.+_2}} + %1 = fcmp ueq float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ugt(float %x, float %y) { +; CHECK-LABEL: fcmp_ugt +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.ls {{LBB.+_2}} + %1 = fcmp ugt float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_uge(float %x, float %y) { +; CHECK-LABEL: fcmp_uge +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.mi {{LBB.+_2}} + %1 = fcmp uge float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ult(float %x, float %y) { +; CHECK-LABEL: fcmp_ult +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.ge {{LBB.+_2}} + %1 = fcmp ult float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_ule(float %x, float %y) { +; CHECK-LABEL: fcmp_ule +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.gt {{LBB.+_2}} + %1 = fcmp ule float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @fcmp_une(float %x, float %y) { +; CHECK-LABEL: fcmp_une +; CHECK: fcmp s0, s1 +; CHECK-NEXT: b.eq {{LBB.+_2}} + %1 = fcmp une float %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_eq +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.ne {{LBB.+_2}} + %1 = icmp eq i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ne(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_ne +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.eq {{LBB.+_2}} + %1 = icmp ne i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ugt(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_ugt +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.ls {{LBB.+_2}} + %1 = icmp ugt i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_uge(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_uge +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.lo {{LBB.+_2}} + %1 = icmp uge i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ult(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_ult +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.hs {{LBB.+_2}} + %1 = icmp ult i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_ule(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_ule +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.hi {{LBB.+_2}} + %1 = icmp ule i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_sgt +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.le {{LBB.+_2}} + %1 = icmp sgt i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sge(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_sge +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.lt {{LBB.+_2}} + %1 = icmp sge i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_slt(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_slt +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.ge {{LBB.+_2}} + %1 = icmp slt i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_sle(i32 %x, i32 %y) { +; CHECK-LABEL: icmp_sle +; CHECK: cmp w0, w1 +; CHECK-NEXT: b.gt {{LBB.+_2}} + %1 = icmp sle i32 %x, %y + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} +