diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 23cd496adeb..0d97669b225 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2002,6 +2002,46 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { (implicit EFLAGS)]>, XS; } +let Predicates = [HasLZCNT] in { + def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; +} + //===----------------------------------------------------------------------===// // BMI Instructions // @@ -2078,6 +2118,47 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } +let Predicates = [HasBMI] in { + def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; +} + + multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, PatFrag ld_frag> { diff --git a/test/CodeGen/X86/lzcnt-tzcnt.ll b/test/CodeGen/X86/lzcnt-tzcnt.ll new file mode 100644 index 00000000000..07e4b9d8ce6 --- /dev/null +++ b/test/CodeGen/X86/lzcnt-tzcnt.ll @@ -0,0 +1,447 @@ +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+lzcnt | FileCheck %s + +; LZCNT and TZCNT will always produce the operand size when the input operand +; is zero. This test is to verify that we efficiently select LZCNT/TZCNT +; based on the fact that the 'icmp+select' sequence is always redundant +; in every function defined below. + + +define i16 @test1_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test1_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test2_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test2_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test3_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test3_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test4_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test4_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test5_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test5_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test6_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test6_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test7_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test7_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test8_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test8_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test9_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test9_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test10_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test10_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test11_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test11_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test12_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test12_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test13_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test13_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test14_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test14_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test15_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test15_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test16_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test16_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test17_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test17_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test18_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test18_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test1_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test1_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test2_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test2_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test3_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test3_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test4_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test4_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test5_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test5_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test6_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test6_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test7_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test7_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test8_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test8_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test9_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test9_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test10_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test10_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test11_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test11_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test12_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test12_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test13_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test13_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test14_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test14_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test15_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test15_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test16_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test16_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test17_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test17_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test18_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test18_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +