[X86] Add ISel patterns to improve the selection of TZCNT and LZCNT.

Instructions TZCNT (requires BMI1) and LZCNT (requires LZCNT), always
provide the operand size as output if the input operand is zero.

We can take advantage of this knowledge during instruction selection
stage in order to simplify a few corner case.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209159 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2014-05-19 20:38:59 +00:00
parent 86ad7b70cb
commit 8e4a223f7b
2 changed files with 528 additions and 0 deletions

View File

@ -2002,6 +2002,46 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
(implicit EFLAGS)]>, XS;
}
let Predicates = [HasLZCNT] in {
def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E),
(X86cmp GR16:$src, (i16 0))),
(LZCNT16rr GR16:$src)>;
def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E),
(X86cmp GR32:$src, (i32 0))),
(LZCNT32rr GR32:$src)>;
def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E),
(X86cmp GR64:$src, (i64 0))),
(LZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E),
(X86cmp GR16:$src, (i16 0))),
(LZCNT16rr GR16:$src)>;
def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E),
(X86cmp GR32:$src, (i32 0))),
(LZCNT32rr GR32:$src)>;
def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E),
(X86cmp GR64:$src, (i64 0))),
(LZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
(X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
(X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
(X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E),
(X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E),
(X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E),
(X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
}
//===----------------------------------------------------------------------===//
// BMI Instructions
//
@ -2078,6 +2118,47 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
let Predicates = [HasBMI] in {
def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E),
(X86cmp GR16:$src, (i16 0))),
(TZCNT16rr GR16:$src)>;
def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E),
(X86cmp GR32:$src, (i32 0))),
(TZCNT32rr GR32:$src)>;
def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E),
(X86cmp GR64:$src, (i64 0))),
(TZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E),
(X86cmp GR16:$src, (i16 0))),
(TZCNT16rr GR16:$src)>;
def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E),
(X86cmp GR32:$src, (i32 0))),
(TZCNT32rr GR32:$src)>;
def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E),
(X86cmp GR64:$src, (i64 0))),
(TZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
(X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
(X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
(X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E),
(X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E),
(X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E),
(X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
}
multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {

View File

@ -0,0 +1,447 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+lzcnt | FileCheck %s
; LZCNT and TZCNT will always produce the operand size when the input operand
; is zero. This test is to verify that we efficiently select LZCNT/TZCNT
; based on the fact that the 'icmp+select' sequence is always redundant
; in every function defined below.
define i16 @test1_ctlz(i16 %v) {
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test1_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test2_ctlz(i32 %v) {
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test2_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test3_ctlz(i64 %v) {
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test3_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test4_ctlz(i16 %v) {
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test4_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test5_ctlz(i32 %v) {
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test5_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test6_ctlz(i64 %v) {
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test6_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test7_ctlz(i16 %v) {
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
ret i16 %cond
}
; CHECK-LABEL: test7_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test8_ctlz(i32 %v) {
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
ret i32 %cond
}
; CHECK-LABEL: test8_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test9_ctlz(i64 %v) {
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
ret i64 %cond
}
; CHECK-LABEL: test9_ctlz
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test10_ctlz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test10_ctlz
; CHECK-NOT: movw
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test11_ctlz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test11_ctlz
; CHECK-NOT: movd
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test12_ctlz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test12_ctlz
; CHECK-NOT: movq
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test13_ctlz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test13_ctlz
; CHECK-NOT: movw
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test14_ctlz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test14_ctlz
; CHECK-NOT: movd
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test15_ctlz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test15_ctlz
; CHECK-NOT: movq
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test16_ctlz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
ret i16 %cond
}
; CHECK-LABEL: test16_ctlz
; CHECK-NOT: movw
; CHECK: lzcnt
; CHECK-NEXT: ret
define i32 @test17_ctlz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
ret i32 %cond
}
; CHECK-LABEL: test17_ctlz
; CHECK-NOT: movd
; CHECK: lzcnt
; CHECK-NEXT: ret
define i64 @test18_ctlz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
ret i64 %cond
}
; CHECK-LABEL: test18_ctlz
; CHECK-NOT: movq
; CHECK: lzcnt
; CHECK-NEXT: ret
define i16 @test1_cttz(i16 %v) {
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test1_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test2_cttz(i32 %v) {
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test2_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test3_cttz(i64 %v) {
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test3_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i16 @test4_cttz(i16 %v) {
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test4_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test5_cttz(i32 %v) {
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test5_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test6_cttz(i64 %v) {
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test6_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i16 @test7_cttz(i16 %v) {
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
ret i16 %cond
}
; CHECK-LABEL: test7_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test8_cttz(i32 %v) {
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
ret i32 %cond
}
; CHECK-LABEL: test8_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test9_cttz(i64 %v) {
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
ret i64 %cond
}
; CHECK-LABEL: test9_cttz
; CHECK: tzcnt
; CHECK-NEXT: ret
define i16 @test10_cttz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test10_cttz
; CHECK-NOT: movw
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test11_cttz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test11_cttz
; CHECK-NOT: movd
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test12_cttz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test12_cttz
; CHECK-NOT: movq
; CHECK: tzcnt
; CHECK-NEXT: ret
define i16 @test13_cttz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
ret i16 %cond
}
; CHECK-LABEL: test13_cttz
; CHECK-NOT: movw
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test14_cttz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
ret i32 %cond
}
; CHECK-LABEL: test14_cttz
; CHECK-NOT: movd
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test15_cttz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
ret i64 %cond
}
; CHECK-LABEL: test15_cttz
; CHECK-NOT: movq
; CHECK: tzcnt
; CHECK-NEXT: ret
define i16 @test16_cttz(i16* %ptr) {
%v = load i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
ret i16 %cond
}
; CHECK-LABEL: test16_cttz
; CHECK-NOT: movw
; CHECK: tzcnt
; CHECK-NEXT: ret
define i32 @test17_cttz(i32* %ptr) {
%v = load i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
ret i32 %cond
}
; CHECK-LABEL: test17_cttz
; CHECK-NOT: movd
; CHECK: tzcnt
; CHECK-NEXT: ret
define i64 @test18_cttz(i64* %ptr) {
%v = load i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
ret i64 %cond
}
; CHECK-LABEL: test18_cttz
; CHECK-NOT: movq
; CHECK: tzcnt
; CHECK-NEXT: ret
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i16 @llvm.cttz.i16(i16, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i16 @llvm.ctlz.i16(i16, i1)