[FastISel][X86] Optimize selects when the condition comes from a compare.

Optimize the select instructions sequence to use the EFLAGS directly from a
compare when possible.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211543 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-06-23 21:55:36 +00:00
parent 1f659329b6
commit 5f4e6e1ec0
6 changed files with 470 additions and 38 deletions

View File

@ -111,6 +111,8 @@ private:
bool X86SelectDivRem(const Instruction *I);
bool X86FastEmitCMoveSelect(const Instruction *I);
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@ -1611,50 +1613,158 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
return true;
}
bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT VT;
if (!isTypeLegal(I->getType(), VT))
/// \brief Emit a conditional move instruction (if the are supported) to lower
/// the select.
bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
MVT RetVT;
if (!isTypeLegal(I->getType(), RetVT))
return false;
// We only use cmov here, if we don't have a cmov instruction bail.
if (!Subtarget->hasCMov()) return false;
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
if (VT == MVT::i16) {
Opc = X86::CMOVE16rr;
RC = &X86::GR16RegClass;
} else if (VT == MVT::i32) {
Opc = X86::CMOVE32rr;
RC = &X86::GR32RegClass;
} else if (VT == MVT::i64) {
Opc = X86::CMOVE64rr;
RC = &X86::GR64RegClass;
} else {
// Check if the subtarget supports these instructions.
if (!Subtarget->hasCMov())
return false;
// FIXME: Add support for i8.
unsigned Opc;
switch (RetVT.SimpleTy) {
default: return false;
case MVT::i16: Opc = X86::CMOVNE16rr; break;
case MVT::i32: Opc = X86::CMOVNE32rr; break;
case MVT::i64: Opc = X86::CMOVNE64rr; break;
}
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
unsigned Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
const Value *Cond = I->getOperand(0);
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
bool NeedTest = true;
// Selects operate on i1, however, Op0Reg is 8 bits width and may contain
// garbage. Indeed, only the less significant bit is supposed to be accurate.
// If we read more than the lsb, we may see non-zero values whereas lsb
// is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
// This is achieved by performing TEST against 1.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(Op0Reg).addImm(1);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Op1Reg).addReg(Op2Reg);
// Optimize conditons coming from a compare.
if (const auto *CI = dyn_cast<CmpInst>(Cond)) {
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
static unsigned SETFOpcTable[2][3] = {
{ X86::SETNPr, X86::SETEr , X86::TEST8rr },
{ X86::SETPr, X86::SETNEr, X86::OR8rr }
};
unsigned *SETFOpc = nullptr;
switch (Predicate) {
default: break;
case CmpInst::FCMP_OEQ:
SETFOpc = &SETFOpcTable[0][0];
Predicate = CmpInst::ICMP_NE;
break;
case CmpInst::FCMP_UNE:
SETFOpc = &SETFOpcTable[1][0];
Predicate = CmpInst::ICMP_NE;
break;
}
X86::CondCode CC;
bool NeedSwap;
std::tie(CC, NeedSwap) = getX86ConditonCode(Predicate);
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
Opc = X86::getCMovFromCond(CC, RC->getSize());
const Value *CmpLHS = CI->getOperand(0);
const Value *CmpRHS = CI->getOperand(1);
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
EVT CmpVT = TLI.getValueType(CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
return false;
if (SETFOpc) {
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
FlagReg1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
FlagReg2);
auto const &II = TII.get(SETFOpc[2]);
if (II.getNumDefs()) {
unsigned TmpReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
.addReg(FlagReg2).addReg(FlagReg1);
} else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(FlagReg2).addReg(FlagReg1);
}
}
NeedTest = false;
}
if (NeedTest) {
// Selects operate on i1, however, CondReg is 8 bits width and may contain
// garbage. Indeed, only the less significant bit is supposed to be
// accurate. If we read more than the lsb, we may see non-zero values
// whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
// the select. This is achieved by performing TEST against 1.
unsigned CondReg = getRegForValue(Cond);
if (CondReg == 0)
return false;
bool CondIsKill = hasTrivialKill(Cond);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
}
const Value *LHS = I->getOperand(1);
const Value *RHS = I->getOperand(2);
unsigned RHSReg = getRegForValue(RHS);
bool RHSIsKill = hasTrivialKill(RHS);
unsigned LHSReg = getRegForValue(LHS);
bool LHSIsKill = hasTrivialKill(LHS);
if (!LHSReg || !RHSReg)
return false;
unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
LHSReg, LHSIsKill);
UpdateValueMap(I, ResultReg);
return true;
}
bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT RetVT;
if (!isTypeLegal(I->getType(), RetVT))
return false;
// Check if we can fold the select.
if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
const Value *Opnd = nullptr;
switch (Predicate) {
default: break;
case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
}
// No need for a select anymore - this is an unconditional move.
if (Opnd) {
unsigned OpReg = getRegForValue(Opnd);
if (OpReg == 0)
return false;
bool OpIsKill = hasTrivialKill(Opnd);
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(OpReg, getKillRegState(OpIsKill));
UpdateValueMap(I, ResultReg);
return true;
}
}
// First try to use real conditional move instructions.
if (X86FastEmitCMoveSelect(I))
return true;
return false;
}
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
if (X86ScalarSSEf64 &&

View File

@ -2696,8 +2696,8 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
/// getCMovFromCond - Return a cmov opcode for the given condition,
/// register size in bytes, and operand type.
static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
bool HasMemoryOperand) {
unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
bool HasMemoryOperand) {
static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },

View File

@ -66,6 +66,11 @@ namespace X86 {
/// a memory operand.
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
/// \brief Return a cmov opcode for the given condition, register size in
/// bytes, and operand type.
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
bool HasMemoryOperand = false);
// Turn CMov opcode into condition code.
CondCode getCondFromCMovOpc(unsigned Opc);

View File

@ -0,0 +1,62 @@
; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
; Test conditional move for the supported types (i16, i32, and i32) and
; conditon input (argument or cmp). Currently i8 is not supported.
define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroext %b) {
; CHECK-LABEL: select_cmov_i16
; CHECK: testb $1, %dil
; CHECK-NEXT: cmovew %dx, %si
; CHECK-NEXT: movzwl %si, %eax
%1 = select i1 %cond, i16 %a, i16 %b
ret i16 %1
}
define zeroext i16 @select_cmp_cmov_i16(i16 zeroext %a, i16 zeroext %b) {
; CHECK-LABEL: select_cmp_cmov_i16
; CHECK: cmpw %si, %di
; CHECK-NEXT: cmovbw %di, %si
; CHECK-NEXT: movzwl %si, %eax
%1 = icmp ult i16 %a, %b
%2 = select i1 %1, i16 %a, i16 %b
ret i16 %2
}
define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
; CHECK-LABEL: select_cmov_i32
; CHECK: testb $1, %dil
; CHECK-NEXT: cmovel %edx, %esi
; CHECK-NEXT: movl %esi, %eax
%1 = select i1 %cond, i32 %a, i32 %b
ret i32 %1
}
define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) {
; CHECK-LABEL: select_cmp_cmov_i32
; CHECK: cmpl %esi, %edi
; CHECK-NEXT: cmovbl %edi, %esi
; CHECK-NEXT: movl %esi, %eax
%1 = icmp ult i32 %a, %b
%2 = select i1 %1, i32 %a, i32 %b
ret i32 %2
}
define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
; CHECK-LABEL: select_cmov_i64
; CHECK: testb $1, %dil
; CHECK-NEXT: cmoveq %rdx, %rsi
; CHECK-NEXT: movq %rsi, %rax
%1 = select i1 %cond, i64 %a, i64 %b
ret i64 %1
}
define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) {
; CHECK-LABEL: select_cmp_cmov_i64
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovbq %rdi, %rsi
; CHECK-NEXT: movq %rsi, %rax
%1 = icmp ult i64 %a, %b
%2 = select i1 %1, i64 %a, i64 %b
ret i64 %2
}

View File

@ -0,0 +1,255 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
; Test all the cmp predicates that can feed an integer conditional move.
define i64 @select_fcmp_false_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_false_cmov
; CHECK: movq %rsi, %rax
; CHECK-NEXT: retq
%1 = fcmp false double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_oeq_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: setnp %al
; CHECK-NEXT: sete %cl
; CHECK-NEXT: testb %al, %cl
; CHECK-NEXT: cmoveq %rsi, %rdi
%1 = fcmp oeq double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ogt_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovbeq %rsi, %rdi
%1 = fcmp ogt double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_oge_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovbq %rsi, %rdi
%1 = fcmp oge double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_olt_cmov
; CHECK: ucomisd %xmm0, %xmm1
; CHECK-NEXT: cmovbeq %rsi, %rdi
%1 = fcmp olt double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ole_cmov
; CHECK: ucomisd %xmm0, %xmm1
; CHECK-NEXT: cmovbq %rsi, %rdi
%1 = fcmp ole double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_one_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmoveq %rsi, %rdi
%1 = fcmp one double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ord_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovpq %rsi, %rdi
%1 = fcmp ord double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_uno_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovnpq %rsi, %rdi
%1 = fcmp uno double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ueq_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovneq %rsi, %rdi
%1 = fcmp ueq double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ugt_cmov
; CHECK: ucomisd %xmm0, %xmm1
; CHECK-NEXT: cmovaeq %rsi, %rdi
%1 = fcmp ugt double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_uge_cmov
; CHECK: ucomisd %xmm0, %xmm1
; CHECK-NEXT: cmovaq %rsi, %rdi
%1 = fcmp uge double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ult_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovaeq %rsi, %rdi
%1 = fcmp ult double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_ule_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: cmovaq %rsi, %rdi
%1 = fcmp ule double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_une_cmov
; CHECK: ucomisd %xmm1, %xmm0
; CHECK-NEXT: setp %al
; CHECK-NEXT: setne %cl
; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: cmoveq %rsi, %rdi
%1 = fcmp une double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_fcmp_true_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_true_cmov
; CHECK: movq %rdi, %rax
%1 = fcmp true double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_eq_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovneq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp eq i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_ne_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmoveq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp ne i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_ugt_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovbeq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp ugt i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_uge_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovbq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp uge i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_ult_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovaeq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp ult i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_ule_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovaq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp ule i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_sgt_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovleq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp sgt i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_sge_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovlq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp sge i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_slt_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovgeq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp slt i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}
define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_icmp_sle_cmov
; CHECK: cmpq %rsi, %rdi
; CHECK-NEXT: cmovgq %rcx, %rdx
; CHECK-NEXT: movq %rdx, %rax
%1 = icmp sle i64 %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
}

View File

@ -4,10 +4,10 @@
; lsb is zero.
; <rdar://problem/15651765>
; CHECK-LABEL: fastisel_select:
; CHECK-LABEL: fastisel_select:
; CHECK: subb {{%[a-z0-9]+}}, [[RES:%[a-z0-9]+]]
; CHECK: testb $1, [[RES]]
; CHECK: cmovel
; CHECK: cmovnel %edi, %esi
define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) {
%shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766
%counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0