llvm/test/CodeGen/X86/ctpop-combine.ll
Michael Kuperstein 664a3a9314 Recommit r274692 - [X86] Transform setcc + movzbl into xorl + setcc
xorl + setcc is generally the preferred sequence due to the partial register
stall setcc + movzbl suffers from. As a bonus, it also encodes one byte smaller.
This fixes PR28146.

The original commit tried inserting an 8bit-subreg into a GR32 (not GR32_ABCD)
which was not appreciated by fast regalloc on 32-bit.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274802 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-07 22:50:23 +00:00

51 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=corei7 | FileCheck %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
define i32 @test1(i64 %x) nounwind readnone {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: leaq -1(%rdi), %rcx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testq %rcx, %rdi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i32
%cmp = icmp ugt i32 %cast, 1
%conv = zext i1 %cmp to i32
ret i32 %conv
}
define i32 @test2(i64 %x) nounwind readnone {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: leaq -1(%rdi), %rcx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testq %rcx, %rdi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp ult i64 %count, 2
%conv = zext i1 %cmp to i32
ret i32 %conv
}
define i32 @test3(i64 %x) nounwind readnone {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: popcntq %rdi, %rax
; CHECK-NEXT: andb $63, %al
; CHECK-NEXT: cmpb $2, %al
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i6 ; Too small for 0-64
%cmp = icmp ult i6 %cast, 2
%conv = zext i1 %cmp to i32
ret i32 %conv
}