From 8e4a223f7bb3ee0ae6a0888b8e670a6bd4983a0a Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Mon, 19 May 2014 20:38:59 +0000 Subject: [PATCH] [X86] Add ISel patterns to improve the selection of TZCNT and LZCNT. Instructions TZCNT (requires BMI1) and LZCNT (requires LZCNT), always provide the operand size as output if the input operand is zero. We can take advantage of this knowledge during instruction selection stage in order to simplify a few corner case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209159 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 81 ++++++ test/CodeGen/X86/lzcnt-tzcnt.ll | 447 ++++++++++++++++++++++++++++++++ 2 files changed, 528 insertions(+) create mode 100644 test/CodeGen/X86/lzcnt-tzcnt.ll diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 23cd496adeb..0d97669b225 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2002,6 +2002,46 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { (implicit EFLAGS)]>, XS; } +let Predicates = [HasLZCNT] in { + def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; +} + //===----------------------------------------------------------------------===// // BMI Instructions // @@ -2078,6 +2118,47 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } +let Predicates = [HasBMI] in { + def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; +} + + multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, PatFrag ld_frag> { diff --git a/test/CodeGen/X86/lzcnt-tzcnt.ll b/test/CodeGen/X86/lzcnt-tzcnt.ll new file mode 100644 index 00000000000..07e4b9d8ce6 --- /dev/null +++ b/test/CodeGen/X86/lzcnt-tzcnt.ll @@ -0,0 +1,447 @@ +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+lzcnt | FileCheck %s + +; LZCNT and TZCNT will always produce the operand size when the input operand +; is zero. This test is to verify that we efficiently select LZCNT/TZCNT +; based on the fact that the 'icmp+select' sequence is always redundant +; in every function defined below. + + +define i16 @test1_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test1_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test2_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test2_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test3_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test3_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test4_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test4_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test5_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test5_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test6_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test6_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test7_ctlz(i16 %v) { + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test7_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test8_ctlz(i32 %v) { + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test8_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test9_ctlz(i64 %v) { + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test9_ctlz +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test10_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test10_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test11_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test11_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test12_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test12_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test13_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test13_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test14_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test14_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test15_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test15_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test16_ctlz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test16_ctlz +; CHECK-NOT: movw +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i32 @test17_ctlz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test17_ctlz +; CHECK-NOT: movd +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i64 @test18_ctlz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test18_ctlz +; CHECK-NOT: movq +; CHECK: lzcnt +; CHECK-NEXT: ret + + +define i16 @test1_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test1_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test2_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test2_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test3_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test3_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test4_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test4_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test5_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test5_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test6_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test6_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test7_cttz(i16 %v) { + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test7_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test8_cttz(i32 %v) { + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test8_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test9_cttz(i64 %v) { + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test9_cttz +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test10_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 %v, 0 + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test10_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test11_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test11_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test12_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test12_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test13_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 16, i16 %cnt + ret i16 %cond +} +; CHECK-LABEL: test13_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test14_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 32, i32 %cnt + ret i32 %cond +} +; CHECK-LABEL: test14_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test15_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 64, i64 %cnt + ret i64 %cond +} +; CHECK-LABEL: test15_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i16 @test16_cttz(i16* %ptr) { + %v = load i16* %ptr + %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true) + %tobool = icmp eq i16 0, %v + %cond = select i1 %tobool, i16 %cnt, i16 16 + ret i16 %cond +} +; CHECK-LABEL: test16_cttz +; CHECK-NOT: movw +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i32 @test17_cttz(i32* %ptr) { + %v = load i32* %ptr + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 0, %v + %cond = select i1 %tobool, i32 %cnt, i32 32 + ret i32 %cond +} +; CHECK-LABEL: test17_cttz +; CHECK-NOT: movd +; CHECK: tzcnt +; CHECK-NEXT: ret + + +define i64 @test18_cttz(i64* %ptr) { + %v = load i64* %ptr + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 0, %v + %cond = select i1 %tobool, i64 %cnt, i64 64 + ret i64 %cond +} +; CHECK-LABEL: test18_cttz +; CHECK-NOT: movq +; CHECK: tzcnt +; CHECK-NEXT: ret + + +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +