llvm/test/CodeGen/X86/shift-pcmp.ll
Sanjay Patel 2ca896a175 [x86][SSE/AVX] optimize pcmp results better (PR28484)
We know that pcmp produces all-ones/all-zeros bitmasks, so we can use that behavior to avoid unnecessary constant loading.

One could argue that load+and is actually a better solution for some CPUs (Intel big cores) because shifts don't have the
same throughput potential as load+and on those cores, but that should be handled as a CPU-specific later transformation if
it ever comes up. Removing the load is the more general x86 optimization. Note that the uneven usage of vpbroadcast in the
test cases is filed as PR28505:
https://llvm.org/bugs/show_bug.cgi?id=28505

Differential Revision: http://reviews.llvm.org/D22225



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275276 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-13 16:04:07 +00:00

46 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
define <8 x i16> @foo(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: foo:
; SSE: # BB#0:
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: foo:
; AVX: # BB#0:
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
%icmp = icmp eq <8 x i16> %a, %b
%zext = zext <8 x i1> %icmp to <8 x i16>
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
ret <8 x i16> %shl
}
; Don't fail with an assert due to an undef in the buildvector
define <8 x i16> @bar(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: bar:
; SSE: # BB#0:
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
; SSE-NEXT: psrlw $15, %xmm0
; SSE-NEXT: psllw $5, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: bar:
; AVX: # BB#0:
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
; AVX-NEXT: retq
;
%icmp = icmp eq <8 x i16> %a, %b
%zext = zext <8 x i1> %icmp to <8 x i16>
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 undef, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
ret <8 x i16> %shl
}