mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-12 23:40:54 +00:00
2ca896a175
We know that pcmp produces all-ones/all-zeros bitmasks, so we can use that behavior to avoid unnecessary constant loading. One could argue that load+and is actually a better solution for some CPUs (Intel big cores) because shifts don't have the same throughput potential as load+and on those cores, but that should be handled as a CPU-specific later transformation if it ever comes up. Removing the load is the more general x86 optimization. Note that the uneven usage of vpbroadcast in the test cases is filed as PR28505: https://llvm.org/bugs/show_bug.cgi?id=28505 Differential Revision: http://reviews.llvm.org/D22225 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275276 91177308-0d34-0410-b5e6-96231b3b80d8
46 lines
1.4 KiB
LLVM
46 lines
1.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc < %s -o - -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
|
|
|
define <8 x i16> @foo(<8 x i16> %a, <8 x i16> %b) {
|
|
; SSE-LABEL: foo:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
|
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: foo:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
;
|
|
%icmp = icmp eq <8 x i16> %a, %b
|
|
%zext = zext <8 x i1> %icmp to <8 x i16>
|
|
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
|
ret <8 x i16> %shl
|
|
}
|
|
|
|
; Don't fail with an assert due to an undef in the buildvector
|
|
define <8 x i16> @bar(<8 x i16> %a, <8 x i16> %b) {
|
|
; SSE-LABEL: bar:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
|
|
; SSE-NEXT: psrlw $15, %xmm0
|
|
; SSE-NEXT: psllw $5, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: bar:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
|
|
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
;
|
|
%icmp = icmp eq <8 x i16> %a, %b
|
|
%zext = zext <8 x i1> %icmp to <8 x i16>
|
|
%shl = shl nuw nsw <8 x i16> %zext, <i16 5, i16 undef, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
|
ret <8 x i16> %shl
|
|
}
|
|
|