mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-28 14:10:55 +00:00
X86: optimize generated code for integer ABS
This patch will generate the following for integer ABS: movl %edi, %eax negl %eax cmovll %edi, %eax INSTEAD OF movl %edi, %ecx sarl $31, %ecx leal (%rdi,%rcx), %eax xorl %ecx, %eax There exists a target-independent DAG combine for integer ABS, which converts integer ABS to sar+add+xor. For X86, we match this pattern back to neg+cmov. This is implemented in PerformXorCombine. rdar://10695237 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158175 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fa371eaff3
commit
9236362a64
@ -1227,8 +1227,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setTargetDAGCombine(ISD::FP_TO_SINT);
|
setTargetDAGCombine(ISD::FP_TO_SINT);
|
||||||
if (Subtarget->is64Bit())
|
if (Subtarget->is64Bit())
|
||||||
setTargetDAGCombine(ISD::MUL);
|
setTargetDAGCombine(ISD::MUL);
|
||||||
if (Subtarget->hasBMI())
|
setTargetDAGCombine(ISD::XOR);
|
||||||
setTargetDAGCombine(ISD::XOR);
|
|
||||||
|
|
||||||
computeRegisterProperties();
|
computeRegisterProperties();
|
||||||
|
|
||||||
@ -14507,6 +14506,41 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate NEG and CMOV for integer abs.
|
||||||
|
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
|
||||||
|
// Since X86 does not have CMOV for 8-bit integer, we don't convert
|
||||||
|
// 8-bit integer abs to NEG and CMOV.
|
||||||
|
if (VT.isInteger() && VT.getSizeInBits() == 8)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue N0 = N->getOperand(0);
|
||||||
|
SDValue N1 = N->getOperand(1);
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
|
||||||
|
// Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
|
||||||
|
// and change it to SUB and CMOV.
|
||||||
|
if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
|
||||||
|
N0.getOpcode() == ISD::ADD &&
|
||||||
|
N0.getOperand(1) == N1 &&
|
||||||
|
N1.getOpcode() == ISD::SRA &&
|
||||||
|
N1.getOperand(0) == N0.getOperand(0))
|
||||||
|
if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
|
||||||
|
if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
|
||||||
|
// Generate SUB & CMOV.
|
||||||
|
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
|
||||||
|
DAG.getConstant(0, VT), N0.getOperand(0));
|
||||||
|
|
||||||
|
SDValue Ops[] = { N0.getOperand(0), Neg,
|
||||||
|
DAG.getConstant(X86::COND_GE, MVT::i8),
|
||||||
|
SDValue(Neg.getNode(), 1) };
|
||||||
|
return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue),
|
||||||
|
Ops, array_lengthof(Ops));
|
||||||
|
}
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
|
// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
|
||||||
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
|
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
@ -14514,6 +14548,14 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
if (DCI.isBeforeLegalizeOps())
|
if (DCI.isBeforeLegalizeOps())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue RV = performIntegerAbsCombine(N, DAG);
|
||||||
|
if (RV.getNode())
|
||||||
|
return RV;
|
||||||
|
|
||||||
|
// Try forming BMI if it is available.
|
||||||
|
if (!Subtarget->hasBMI())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
|
||||||
if (VT != MVT::i32 && VT != MVT::i64)
|
if (VT != MVT::i32 && VT != MVT::i64)
|
||||||
|
@ -1,13 +1,17 @@
|
|||||||
; RUN: llc < %s -march=x86-64 -stats |& \
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
; RUN: grep {5 .*Number of machine instrs printed}
|
|
||||||
|
|
||||||
;; Integer absolute value, should produce something at least as good as:
|
;; Integer absolute value, should produce something at least as good as:
|
||||||
;; movl %edi, %ecx
|
;; movl %edi, %eax
|
||||||
;; sarl $31, %ecx
|
;; negl %eax
|
||||||
;; leal (%rdi,%rcx), %eax
|
;; cmovll %edi, %eax
|
||||||
;; xorl %ecx, %eax
|
|
||||||
;; ret
|
;; ret
|
||||||
|
; rdar://10695237
|
||||||
define i32 @test(i32 %a) nounwind {
|
define i32 @test(i32 %a) nounwind {
|
||||||
|
; CHECK: test:
|
||||||
|
; CHECK: mov
|
||||||
|
; CHECK-NEXT: neg
|
||||||
|
; CHECK-NEXT: cmov
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%tmp1neg = sub i32 0, %a
|
%tmp1neg = sub i32 0, %a
|
||||||
%b = icmp sgt i32 %a, -1
|
%b = icmp sgt i32 %a, -1
|
||||||
%abs = select i1 %b, i32 %a, i32 %tmp1neg
|
%abs = select i1 %b, i32 %a, i32 %tmp1neg
|
||||||
|
Loading…
Reference in New Issue
Block a user