[FastISel][X86] Optimize selects when the condition comes from a compare.

Optimize the select instructions sequence to use the EFLAGS directly from a compare when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211543 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-28 23:43:50 +00:00 · 2014-06-23 21:55:36 +00:00 · 2014-06-23 21:55:36 +00:00 · 5f4e6e1ec0
commit 5f4e6e1ec0
parent 1f659329b6
6 changed files with 470 additions and 38 deletions
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@ -111,6 +111,8 @@ private:

  bool X86SelectDivRem(const Instruction *I);

+  bool X86FastEmitCMoveSelect(const Instruction *I);
+
  bool X86SelectSelect(const Instruction *I);

  bool X86SelectTrunc(const Instruction *I);
@ -1611,50 +1613,158 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
  return true;
 }

-bool X86FastISel::X86SelectSelect(const Instruction *I) {
-  MVT VT;
-  if (!isTypeLegal(I->getType(), VT))
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
+  MVT RetVT;
+  if (!isTypeLegal(I->getType(), RetVT))
    return false;

-  // We only use cmov here, if we don't have a cmov instruction bail.
-  if (!Subtarget->hasCMov()) return false;
-
-  unsigned Opc = 0;
-  const TargetRegisterClass *RC = nullptr;
-  if (VT == MVT::i16) {
-    Opc = X86::CMOVE16rr;
-    RC = &X86::GR16RegClass;
-  } else if (VT == MVT::i32) {
-    Opc = X86::CMOVE32rr;
-    RC = &X86::GR32RegClass;
-  } else if (VT == MVT::i64) {
-    Opc = X86::CMOVE64rr;
-    RC = &X86::GR64RegClass;
-  } else {
+  // Check if the subtarget supports these instructions.
+  if (!Subtarget->hasCMov())
    return false;
+
+  // FIXME: Add support for i8.
+  unsigned Opc;
+  switch (RetVT.SimpleTy) {
+  default: return false;
+  case MVT::i16: Opc = X86::CMOVNE16rr; break;
+  case MVT::i32: Opc = X86::CMOVNE32rr; break;
+  case MVT::i64: Opc = X86::CMOVNE64rr; break;
  }

-  unsigned Op0Reg = getRegForValue(I->getOperand(0));
-  if (Op0Reg == 0) return false;
-  unsigned Op1Reg = getRegForValue(I->getOperand(1));
-  if (Op1Reg == 0) return false;
-  unsigned Op2Reg = getRegForValue(I->getOperand(2));
-  if (Op2Reg == 0) return false;
+  const Value *Cond = I->getOperand(0);
+  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+  bool NeedTest = true;

-  // Selects operate on i1, however, Op0Reg is 8 bits width and may contain
-  // garbage. Indeed, only the less significant bit is supposed to be accurate.
-  // If we read more than the lsb, we may see non-zero values whereas lsb
-  // is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
-  // This is achieved by performing TEST against 1.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
-    .addReg(Op0Reg).addImm(1);
-  unsigned ResultReg = createResultReg(RC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-    .addReg(Op1Reg).addReg(Op2Reg);
+  // Optimize conditons coming from a compare.
+  if (const auto *CI = dyn_cast<CmpInst>(Cond)) {
+    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+    // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+    static unsigned SETFOpcTable[2][3] = {
+      { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+      { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
+    };
+    unsigned *SETFOpc = nullptr;
+    switch (Predicate) {
+    default: break;
+    case CmpInst::FCMP_OEQ:
+      SETFOpc = &SETFOpcTable[0][0];
+      Predicate = CmpInst::ICMP_NE;
+      break;
+    case CmpInst::FCMP_UNE:
+      SETFOpc = &SETFOpcTable[1][0];
+      Predicate = CmpInst::ICMP_NE;
+      break;
+    }
+
+    X86::CondCode CC;
+    bool NeedSwap;
+    std::tie(CC, NeedSwap) = getX86ConditonCode(Predicate);
+    assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+    Opc = X86::getCMovFromCond(CC, RC->getSize());
+
+    const Value *CmpLHS = CI->getOperand(0);
+    const Value *CmpRHS = CI->getOperand(1);
+    if (NeedSwap)
+      std::swap(CmpLHS, CmpRHS);
+
+    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+    // Emit a compare of the LHS and RHS, setting the flags.
+    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+     return false;
+
+    if (SETFOpc) {
+      unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+      unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+              FlagReg1);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+              FlagReg2);
+      auto const &II = TII.get(SETFOpc[2]);
+      if (II.getNumDefs()) {
+        unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+          .addReg(FlagReg2).addReg(FlagReg1);
+      } else {
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+          .addReg(FlagReg2).addReg(FlagReg1);
+      }
+    }
+    NeedTest = false;
+  }
+
+  if (NeedTest) {
+    // Selects operate on i1, however, CondReg is 8 bits width and may contain
+    // garbage. Indeed, only the less significant bit is supposed to be
+    // accurate. If we read more than the lsb, we may see non-zero values
+    // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+    // the select. This is achieved by performing TEST against 1.
+    unsigned CondReg = getRegForValue(Cond);
+    if (CondReg == 0)
+      return false;
+    bool CondIsKill = hasTrivialKill(Cond);
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+  }
+
+  const Value *LHS = I->getOperand(1);
+  const Value *RHS = I->getOperand(2);
+
+  unsigned RHSReg = getRegForValue(RHS);
+  bool RHSIsKill = hasTrivialKill(RHS);
+
+  unsigned LHSReg = getRegForValue(LHS);
+  bool LHSIsKill = hasTrivialKill(LHS);
+
+  if (!LHSReg || !RHSReg)
+    return false;
+
+  unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+                                       LHSReg, LHSIsKill);
  UpdateValueMap(I, ResultReg);
  return true;
 }

+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+  MVT RetVT;
+  if (!isTypeLegal(I->getType(), RetVT))
+    return false;
+
+  // Check if we can fold the select.
+  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+    const Value *Opnd = nullptr;
+    switch (Predicate) {
+    default:                              break;
+    case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+    case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
+    }
+    // No need for a select anymore - this is an unconditional move.
+    if (Opnd) {
+      unsigned OpReg = getRegForValue(Opnd);
+      if (OpReg == 0)
+        return false;
+      bool OpIsKill = hasTrivialKill(Opnd);
+      const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+      unsigned ResultReg = createResultReg(RC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(OpReg, getKillRegState(OpIsKill));
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  // First try to use real conditional move instructions.
+  if (X86FastEmitCMoveSelect(I))
+    return true;
+
+  return false;
+}
+
 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
  // fpext from float to double.
  if (X86ScalarSSEf64 &&
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@ -2696,8 +2696,8 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {

 /// getCMovFromCond - Return a cmov opcode for the given condition,
 /// register size in bytes, and operand type.
-static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
-                                bool HasMemoryOperand) {
+unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
+                              bool HasMemoryOperand) {
  static const uint16_t Opc[32][3] = {
    { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
    { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@ -66,6 +66,11 @@ namespace X86 {
  /// a memory operand.
  unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);

+  /// \brief Return a cmov opcode for the given condition, register size in
+  /// bytes, and operand type.
+  unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+                           bool HasMemoryOperand = false);
+
  // Turn CMov opcode into condition code.
  CondCode getCondFromCMovOpc(unsigned Opc);

--- a/test/CodeGen/X86/fast-isel-select-cmov.ll
+++ b/test/CodeGen/X86/fast-isel-select-cmov.ll
@ -0,0 +1,62 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10                  | FileCheck %s
+
+; Test conditional move for the supported types (i16, i32, and i32) and
+; conditon input (argument or cmp). Currently i8 is not supported.
+
+define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_cmov_i16
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmovew  %dx, %si
+; CHECK-NEXT:  movzwl  %si, %eax
+  %1 = select i1 %cond, i16 %a, i16 %b
+  ret i16 %1
+}
+
+define zeroext i16 @select_cmp_cmov_i16(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_cmp_cmov_i16
+; CHECK:       cmpw    %si, %di
+; CHECK-NEXT:  cmovbw  %di, %si
+; CHECK-NEXT:  movzwl  %si, %eax
+  %1 = icmp ult i16 %a, %b
+  %2 = select i1 %1, i16 %a, i16 %b
+  ret i16 %2
+}
+
+define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
+; CHECK-LABEL: select_cmov_i32
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmovel  %edx, %esi
+; CHECK-NEXT:  movl    %esi, %eax
+  %1 = select i1 %cond, i32 %a, i32 %b
+  ret i32 %1
+}
+
+define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: select_cmp_cmov_i32
+; CHECK:       cmpl    %esi, %edi
+; CHECK-NEXT:  cmovbl  %edi, %esi
+; CHECK-NEXT:  movl    %esi, %eax
+  %1 = icmp ult i32 %a, %b
+  %2 = select i1 %1, i32 %a, i32 %b
+  ret i32 %2
+}
+
+define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
+; CHECK-LABEL: select_cmov_i64
+; CHECK:       testb   $1, %dil
+; CHECK-NEXT:  cmoveq  %rdx, %rsi
+; CHECK-NEXT:  movq    %rsi, %rax
+  %1 = select i1 %cond, i64 %a, i64 %b
+  ret i64 %1
+}
+
+define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: select_cmp_cmov_i64
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbq  %rdi, %rsi
+; CHECK-NEXT:  movq    %rsi, %rax
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, i64 %a, i64 %b
+  ret i64 %2
+}
+
--- a/test/CodeGen/X86/fast-isel-select-cmov2.ll
+++ b/test/CodeGen/X86/fast-isel-select-cmov2.ll
@ -0,0 +1,255 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+
+; Test all the cmp predicates that can feed an integer conditional move.
+
+define i64 @select_fcmp_false_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_false_cmov
+; CHECK:       movq %rsi, %rax
+; CHECK-NEXT:  retq
+  %1 = fcmp false double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_oeq_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  setnp %al
+; CHECK-NEXT:  sete %cl
+; CHECK-NEXT:  testb %al, %cl
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp oeq double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ogt_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovbeq %rsi, %rdi
+  %1 = fcmp ogt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_oge_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovbq %rsi, %rdi
+  %1 = fcmp oge double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_olt_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovbeq %rsi, %rdi
+  %1 = fcmp olt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ole_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovbq %rsi, %rdi
+  %1 = fcmp ole double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_one_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp one double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ord_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovpq %rsi, %rdi
+  %1 = fcmp ord double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_uno_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovnpq %rsi, %rdi
+  %1 = fcmp uno double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ueq_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovneq %rsi, %rdi
+  %1 = fcmp ueq double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ugt_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovaeq %rsi, %rdi
+  %1 = fcmp ugt double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_uge_cmov
+; CHECK:       ucomisd %xmm0, %xmm1
+; CHECK-NEXT:  cmovaq %rsi, %rdi
+  %1 = fcmp uge double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ult_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovaeq %rsi, %rdi
+  %1 = fcmp ult double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_ule_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  cmovaq %rsi, %rdi
+  %1 = fcmp ule double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_une_cmov
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  setp %al
+; CHECK-NEXT:  setne %cl
+; CHECK-NEXT:  orb %al, %cl
+; CHECK-NEXT:  cmoveq %rsi, %rdi
+  %1 = fcmp une double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_fcmp_true_cmov(double %a, double %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_fcmp_true_cmov
+; CHECK:       movq %rdi, %rax
+  %1 = fcmp true double %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_eq_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovneq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp eq i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ne_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmoveq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ne i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ugt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ugt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+
+define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_uge_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovbq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp uge i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ult_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovaeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_ule_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovaq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp ule i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sgt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovleq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sgt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sge_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovlq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sge i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_slt_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovgeq %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp slt i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
+define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: select_icmp_sle_cmov
+; CHECK:       cmpq    %rsi, %rdi
+; CHECK-NEXT:  cmovgq  %rcx, %rdx
+; CHECK-NEXT:  movq    %rdx, %rax
+  %1 = icmp sle i64 %a, %b
+  %2 = select i1 %1, i64 %c, i64 %d
+  ret i64 %2
+}
+
--- a/test/CodeGen/X86/fast-isel-select.ll
+++ b/test/CodeGen/X86/fast-isel-select.ll
@ -4,10 +4,10 @@
 ; lsb is zero.
 ; <rdar://problem/15651765>

-; CHECK-LABEL: fastisel_select: 
+; CHECK-LABEL: fastisel_select:
 ; CHECK: subb {{%[a-z0-9]+}}, [[RES:%[a-z0-9]+]]
 ; CHECK: testb $1, [[RES]]
-; CHECK: cmovel
+; CHECK: cmovnel %edi, %esi
 define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) {
  %shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766
  %counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0