diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 39bda04b4d1..0d3b54fe8e2 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1786,6 +1786,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const SmallVectorImpl &Cond) const { // FIXME this should probably have a DebugLoc operand DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1799,34 +1800,72 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } // Conditional branch. + const MachineBasicBlock *NextBB = next(&MBB); unsigned Count = 0; X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); + + // In a two-way conditional branch, if the fall-through block is the + // "false" branch of the conditional jumps, we can cut out the + // unconditional jump by rearranging the conditional jumps. This saves a + // few bytes and improves performance. I.e., for COND_NE_OR_P: + // + // JNE L1 + // JP L1 + // JMP L2 + // L1: + // ... + // L2: + // ... + // + // to: + // + // JP L1 + // JE L2 + // L1: + // ... + // L2: + // ... + // + // Similarly for COND_NP_OR_E. switch (CC) { + default: + BuildMI(&MBB, dl, get(GetCondBranchFromCond(CC))).addMBB(TBB); + ++Count; + break; case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); - ++Count; - BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); - ++Count; + if (FBB && FBB == NextBB) { + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(FBB); + FBB = 0; + } else { + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); + } + + Count += 2; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); - ++Count; - BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); - ++Count; + if (FBB && FBB == NextBB) { + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(FBB); + FBB = 0; + } else { + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + } + + Count += 2; break; - default: { - unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); - ++Count; - } } + if (FBB) { // Two-way Conditional branch. Insert the second branch. BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } + return Count; } diff --git a/test/CodeGen/X86/jump-opt.ll b/test/CodeGen/X86/jump-opt.ll new file mode 100644 index 00000000000..dc32f6665f8 --- /dev/null +++ b/test/CodeGen/X86/jump-opt.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s + +; +define float @test1(float %x, float %y) nounwind readnone optsize ssp { +; CHECK: jp +; CHECK-NEXT: je +entry: + %0 = fpext float %x to double + %1 = fpext float %y to double + %2 = fmul double %0, %1 + %3 = fcmp oeq double %2, 0.000000e+00 + br i1 %3, label %bb2, label %bb1 + +bb1: + %4 = fadd double %2, -1.000000e+00 + br label %bb2 + +bb2: + %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] + %.0 = fptrunc double %.0.in to float + ret float %.0 +}