diff --git a/test/CodeGen/X86/lzcnt-zext-cmp.ll b/test/CodeGen/X86/lzcnt-zext-cmp.ll index 6465e370c97..7c961a98ad5 100644 --- a/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -1,26 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test patterns which generates lzcnt instructions. ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt)) -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { -; CHECK-LABEL: test_zext_cmp0: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp0: -; NOFASTLZCNT: # BB#0: # %entry -; NOFASTLZCNT-NEXT: xorl %eax, %eax -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp0: +; ALL: # BB#0: # %entry +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %conv = zext i1 %cmp to i32 @@ -29,13 +22,13 @@ entry: ; Test two 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp1(i32 %a, i32 %b) { -; CHECK-LABEL: test_zext_cmp1: -; CHECK: # BB#0: -; CHECK-NEXT: lzcntl %edi, %ecx -; CHECK-NEXT: lzcntl %esi, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $5, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp1: +; FASTLZCNT: # BB#0: +; FASTLZCNT-NEXT: lzcntl %edi, %ecx +; FASTLZCNT-NEXT: lzcntl %esi, %eax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $5, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp1: ; NOFASTLZCNT: # BB#0: @@ -55,13 +48,13 @@ define i32 @test_zext_cmp1(i32 %a, i32 %b) { ; Test two 64-bit inputs, output is 64-bit. define i64 @test_zext_cmp2(i64 %a, i64 %b) { -; CHECK-LABEL: test_zext_cmp2: -; CHECK: # BB#0: -; CHECK-NEXT: lzcntq %rdi, %rcx -; CHECK-NEXT: lzcntq %rsi, %rax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $6, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp2: +; FASTLZCNT: # BB#0: +; FASTLZCNT-NEXT: lzcntq %rdi, %rcx +; FASTLZCNT-NEXT: lzcntq %rsi, %rax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $6, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp2: ; NOFASTLZCNT: # BB#0: @@ -83,27 +76,16 @@ define i64 @test_zext_cmp2(i64 %a, i64 %b) { ; The transform is disabled for the 16-bit case, as we still have to clear the ; upper 16-bits, adding one more instruction. define i16 @test_zext_cmp3(i16 %a, i16 %b) { -; CHECK-LABEL: test_zext_cmp3: -; CHECK: # BB#0: -; CHECK-NEXT: testw %di, %di -; CHECK-NEXT: sete %al -; CHECK-NEXT: testw %si, %si -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp3: -; NOFASTLZCNT: # BB#0: -; NOFASTLZCNT-NEXT: testw %di, %di -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testw %si, %si -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: # kill: %AX %AX %EAX -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp3: +; ALL: # BB#0: +; ALL-NEXT: testw %di, %di +; ALL-NEXT: sete %al +; ALL-NEXT: testw %si, %si +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: # kill: %AX %AX %EAX +; ALL-NEXT: retq %cmp = icmp eq i16 %a, 0 %cmp1 = icmp eq i16 %b, 0 %or = or i1 %cmp, %cmp1 @@ -113,13 +95,13 @@ define i16 @test_zext_cmp3(i16 %a, i16 %b) { ; Test two 32-bit inputs, output is 64-bit. define i64 @test_zext_cmp4(i32 %a, i32 %b) { -; CHECK-LABEL: test_zext_cmp4: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntl %edi, %ecx -; CHECK-NEXT: lzcntl %esi, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $5, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp4: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntl %edi, %ecx +; FASTLZCNT-NEXT: lzcntl %esi, %eax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $5, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp4: ; NOFASTLZCNT: # BB#0: # %entry @@ -140,14 +122,14 @@ entry: ; Test two 64-bit inputs, output is 32-bit. define i32 @test_zext_cmp5(i64 %a, i64 %b) { -; CHECK-LABEL: test_zext_cmp5: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntq %rdi, %rcx -; CHECK-NEXT: lzcntq %rsi, %rax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $6, %eax -; CHECK-NEXT: # kill: %EAX %EAX %RAX -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp5: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntq %rdi, %rcx +; FASTLZCNT-NEXT: lzcntq %rsi, %rax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $6, %eax +; FASTLZCNT-NEXT: # kill: %EAX %EAX %RAX +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp5: ; NOFASTLZCNT: # BB#0: # %entry @@ -168,15 +150,15 @@ entry: ; Test three 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: test_zext_cmp6: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntl %edi, %eax -; CHECK-NEXT: lzcntl %esi, %ecx -; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: lzcntl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $5, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp6: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntl %edi, %eax +; FASTLZCNT-NEXT: lzcntl %esi, %ecx +; FASTLZCNT-NEXT: orl %eax, %ecx +; FASTLZCNT-NEXT: lzcntl %edx, %eax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $5, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp6: ; NOFASTLZCNT: # BB#0: # %entry @@ -203,15 +185,15 @@ entry: ; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test, ; %.cmp2 inputs' order is inverted. define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: test_zext_cmp7: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntl %edi, %eax -; CHECK-NEXT: lzcntl %esi, %ecx -; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: lzcntl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: shrl $5, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp7: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntl %edi, %eax +; FASTLZCNT-NEXT: lzcntl %esi, %ecx +; FASTLZCNT-NEXT: orl %eax, %ecx +; FASTLZCNT-NEXT: lzcntl %edx, %eax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: shrl $5, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp7: ; NOFASTLZCNT: # BB#0: # %entry @@ -237,17 +219,17 @@ entry: ; Test four 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: test_zext_cmp8: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntl %edi, %eax -; CHECK-NEXT: lzcntl %esi, %esi -; CHECK-NEXT: lzcntl %edx, %edx -; CHECK-NEXT: orl %eax, %esi -; CHECK-NEXT: lzcntl %ecx, %eax -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: shrl $5, %eax -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp8: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntl %edi, %eax +; FASTLZCNT-NEXT: lzcntl %esi, %esi +; FASTLZCNT-NEXT: lzcntl %edx, %edx +; FASTLZCNT-NEXT: orl %eax, %esi +; FASTLZCNT-NEXT: lzcntl %ecx, %eax +; FASTLZCNT-NEXT: orl %edx, %eax +; FASTLZCNT-NEXT: orl %esi, %eax +; FASTLZCNT-NEXT: shrl $5, %eax +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp8: ; NOFASTLZCNT: # BB#0: # %entry @@ -278,15 +260,15 @@ entry: ; Test one 32-bit input, one 64-bit input, output is 32-bit. define i32 @test_zext_cmp9(i32 %a, i64 %b) { -; CHECK-LABEL: test_zext_cmp9: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lzcntq %rsi, %rax -; CHECK-NEXT: lzcntl %edi, %ecx -; CHECK-NEXT: shrl $5, %ecx -; CHECK-NEXT: shrl $6, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: # kill: %EAX %EAX %RAX -; CHECK-NEXT: retq +; FASTLZCNT-LABEL: test_zext_cmp9: +; FASTLZCNT: # BB#0: # %entry +; FASTLZCNT-NEXT: lzcntq %rsi, %rax +; FASTLZCNT-NEXT: lzcntl %edi, %ecx +; FASTLZCNT-NEXT: shrl $5, %ecx +; FASTLZCNT-NEXT: shrl $6, %eax +; FASTLZCNT-NEXT: orl %ecx, %eax +; FASTLZCNT-NEXT: # kill: %EAX %EAX %RAX +; FASTLZCNT-NEXT: retq ; ; NOFASTLZCNT-LABEL: test_zext_cmp9: ; NOFASTLZCNT: # BB#0: # %entry @@ -307,25 +289,15 @@ entry: ; Test 2 128-bit inputs, output is 32-bit, no transformations expected. define i32 @test_zext_cmp10(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) { -; CHECK-LABEL: test_zext_cmp10: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: orq %rsi, %rdi -; CHECK-NEXT: sete %al -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp10: -; NOFASTLZCNT: # BB#0: # %entry -; NOFASTLZCNT-NEXT: orq %rsi, %rdi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: orq %rcx, %rdx -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp10: +; ALL: # BB#0: # %entry +; ALL-NEXT: orq %rsi, %rdi +; ALL-NEXT: sete %al +; ALL-NEXT: orq %rcx, %rdx +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64 @@ -344,27 +316,17 @@ entry: ; PR31902 Fix a crash in combineOrCmpEqZeroToCtlzSrl under fast math. define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { -; CHECK-LABEL: test_zext_cmp11: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vucomisd %xmm2, %xmm0 -; CHECK-NEXT: sete %al -; CHECK-NEXT: vucomisd %xmm2, %xmm1 -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: retq ; -; NOFASTLZCNT-LABEL: test_zext_cmp11: -; NOFASTLZCNT: # BB#0: # %entry -; NOFASTLZCNT-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; NOFASTLZCNT-NEXT: vucomisd %xmm2, %xmm0 -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: vucomisd %xmm2, %xmm1 -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp11: +; ALL: # BB#0: # %entry +; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; ALL-NEXT: vucomisd %xmm2, %xmm0 +; ALL-NEXT: sete %al +; ALL-NEXT: vucomisd %xmm2, %xmm1 +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %cmp = fcmp fast oeq double %a, 0.000000e+00 %cmp1 = fcmp fast oeq double %b, 0.000000e+00