mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 15:10:33 +00:00
[X86][AVX2] Dropped -mcpu from avx2 arithmetic/intrinsics tests
Use triple and attribute only for consistency git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306531 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
32d37d6720
commit
a06118f48b
@ -1,15 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpaddq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpaddq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = add <4 x i64> %i, %j
|
||||
@ -18,12 +18,12 @@ define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
|
||||
define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpaddd:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpaddd:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = add <8 x i32> %i, %j
|
||||
@ -32,12 +32,12 @@ define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
|
||||
define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpaddw:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpaddw:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = add <16 x i16> %i, %j
|
||||
@ -46,12 +46,12 @@ define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
|
||||
define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpaddb:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpaddb:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = add <32 x i8> %i, %j
|
||||
@ -60,12 +60,12 @@ define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
|
||||
define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpsubq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpsubq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = sub <4 x i64> %i, %j
|
||||
@ -74,12 +74,12 @@ define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
|
||||
define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpsubd:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsubd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpsubd:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = sub <8 x i32> %i, %j
|
||||
@ -88,12 +88,12 @@ define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
|
||||
define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpsubw:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsubw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpsubw:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = sub <16 x i16> %i, %j
|
||||
@ -102,12 +102,12 @@ define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
|
||||
define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpsubb:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpsubb:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = sub <32 x i8> %i, %j
|
||||
@ -116,12 +116,12 @@ define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
|
||||
define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpmulld:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpmulld:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = mul <8 x i32> %i, %j
|
||||
@ -130,12 +130,12 @@ define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
|
||||
define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
; X32-LABEL: test_vpmullw:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpmullw:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%x = mul <16 x i16> %i, %j
|
||||
@ -144,7 +144,7 @@ define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone
|
||||
|
||||
define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: mul_v16i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; X32-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
@ -157,7 +157,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_v16i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
@ -174,7 +174,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
|
||||
|
||||
define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: mul_v32i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X32-NEXT: vpmovsxbw %xmm2, %ymm2
|
||||
; X32-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
@ -196,7 +196,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_v32i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X64-NEXT: vpmovsxbw %xmm2, %ymm2
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
@ -222,7 +222,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
|
||||
define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-LABEL: mul_v4i64:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; X32-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; X32-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
@ -234,7 +234,7 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_v4i64:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; X64-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; X64-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
@ -250,12 +250,12 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
|
||||
define <8 x i32> @mul_const1(<8 x i32> %x) {
|
||||
; X32-LABEL: mul_const1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
@ -264,12 +264,12 @@ define <8 x i32> @mul_const1(<8 x i32> %x) {
|
||||
|
||||
define <4 x i64> @mul_const2(<4 x i64> %x) {
|
||||
; X32-LABEL: mul_const2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllq $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllq $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
|
||||
@ -278,12 +278,12 @@ define <4 x i64> @mul_const2(<4 x i64> %x) {
|
||||
|
||||
define <16 x i16> @mul_const3(<16 x i16> %x) {
|
||||
; X32-LABEL: mul_const3:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const3:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
@ -292,13 +292,13 @@ define <16 x i16> @mul_const3(<16 x i16> %x) {
|
||||
|
||||
define <4 x i64> @mul_const4(<4 x i64> %x) {
|
||||
; X32-LABEL: mul_const4:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsubq %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const4:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsubq %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -308,12 +308,12 @@ define <4 x i64> @mul_const4(<4 x i64> %x) {
|
||||
|
||||
define <8 x i32> @mul_const5(<8 x i32> %x) {
|
||||
; X32-LABEL: mul_const5:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const5:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -322,12 +322,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) {
|
||||
|
||||
define <8 x i32> @mul_const6(<8 x i32> %x) {
|
||||
; X32-LABEL: mul_const6:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: vpmulld LCPI18_0, %ymm0, %ymm0
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmulld {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const6:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
|
||||
@ -336,13 +336,13 @@ define <8 x i32> @mul_const6(<8 x i32> %x) {
|
||||
|
||||
define <8 x i64> @mul_const7(<8 x i64> %x) {
|
||||
; X32-LABEL: mul_const7:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpaddq %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const7:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: vpaddq %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: retq
|
||||
@ -352,12 +352,12 @@ define <8 x i64> @mul_const7(<8 x i64> %x) {
|
||||
|
||||
define <8 x i16> @mul_const8(<8 x i16> %x) {
|
||||
; X32-LABEL: mul_const8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
@ -366,14 +366,14 @@ define <8 x i16> @mul_const8(<8 x i16> %x) {
|
||||
|
||||
define <8 x i32> @mul_const9(<8 x i32> %x) {
|
||||
; X32-LABEL: mul_const9:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl $2, %eax
|
||||
; X32-NEXT: vmovd %eax, %xmm1
|
||||
; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const9:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl $2, %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0
|
||||
@ -385,13 +385,13 @@ define <8 x i32> @mul_const9(<8 x i32> %x) {
|
||||
; %x * 0x01010101
|
||||
define <4 x i32> @mul_const10(<4 x i32> %x) {
|
||||
; X32-LABEL: mul_const10:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: vpbroadcastd LCPI22_0, %xmm1
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const10:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
@ -402,13 +402,13 @@ define <4 x i32> @mul_const10(<4 x i32> %x) {
|
||||
; %x * 0x80808080
|
||||
define <4 x i32> @mul_const11(<4 x i32> %x) {
|
||||
; X32-LABEL: mul_const11:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: vpbroadcastd LCPI23_0, %xmm1
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul_const11:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
|
@ -1,15 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
; X32-LABEL: v8i32_cmpgt:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v8i32_cmpgt:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp slt <8 x i32> %i, %j
|
||||
@ -19,12 +19,12 @@ define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
|
||||
define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-LABEL: v4i64_cmpgt:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v4i64_cmpgt:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp slt <4 x i64> %i, %j
|
||||
@ -34,12 +34,12 @@ define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
|
||||
define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
; X32-LABEL: v16i16_cmpgt:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v16i16_cmpgt:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp slt <16 x i16> %i, %j
|
||||
@ -49,12 +49,12 @@ define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone
|
||||
|
||||
define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: v32i8_cmpgt:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v32i8_cmpgt:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp slt <32 x i8> %i, %j
|
||||
@ -64,12 +64,12 @@ define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
|
||||
define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
; X32-LABEL: int256_cmpeq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: int256_cmpeq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp eq <8 x i32> %i, %j
|
||||
@ -79,12 +79,12 @@ define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
|
||||
define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
; X32-LABEL: v4i64_cmpeq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v4i64_cmpeq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp eq <4 x i64> %i, %j
|
||||
@ -94,12 +94,12 @@ define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
|
||||
define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
; X32-LABEL: v16i16_cmpeq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v16i16_cmpeq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp eq <16 x i16> %i, %j
|
||||
@ -109,12 +109,12 @@ define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone
|
||||
|
||||
define <32 x i8> @v32i8_cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
; X32-LABEL: v32i8_cmpeq:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: v32i8_cmpeq:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%bincmp = icmp eq <32 x i8> %i, %j
|
||||
|
@ -1,21 +1,21 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
|
||||
; X32-LABEL: trunc4:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc4:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%B = trunc <4 x i64> %A to <4 x i32>
|
||||
@ -24,18 +24,18 @@ define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
|
||||
|
||||
define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
|
||||
; X32-LABEL: trunc8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%B = trunc <8 x i32> %A to <8 x i16>
|
||||
@ -44,12 +44,12 @@ define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
|
||||
|
||||
define <4 x i64> @sext4(<4 x i32> %A) nounwind {
|
||||
; X32-LABEL: sext4:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sext4:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%B = sext <4 x i32> %A to <4 x i64>
|
||||
@ -58,12 +58,12 @@ define <4 x i64> @sext4(<4 x i32> %A) nounwind {
|
||||
|
||||
define <8 x i32> @sext8(<8 x i16> %A) nounwind {
|
||||
; X32-LABEL: sext8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sext8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%B = sext <8 x i16> %A to <8 x i32>
|
||||
@ -72,12 +72,12 @@ define <8 x i32> @sext8(<8 x i16> %A) nounwind {
|
||||
|
||||
define <4 x i64> @zext4(<4 x i32> %A) nounwind {
|
||||
; X32-LABEL: zext4:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: zext4:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-NEXT: retq
|
||||
%B = zext <4 x i32> %A to <4 x i64>
|
||||
@ -86,12 +86,12 @@ define <4 x i64> @zext4(<4 x i32> %A) nounwind {
|
||||
|
||||
define <8 x i32> @zext8(<8 x i16> %A) nounwind {
|
||||
; X32-LABEL: zext8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: zext8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: retq
|
||||
%B = zext <8 x i16> %A to <8 x i32>
|
||||
@ -100,13 +100,13 @@ define <8 x i32> @zext8(<8 x i16> %A) nounwind {
|
||||
|
||||
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
|
||||
; X32-LABEL: zext_8i8_8i32:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: vpand LCPI6_0, %xmm0, %xmm0
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: zext_8i8_8i32:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: retq
|
||||
@ -116,12 +116,12 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
|
||||
|
||||
define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
|
||||
; X32-LABEL: zext_16i8_16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: zext_16i8_16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; X64-NEXT: retq
|
||||
%t = zext <16 x i8> %z to <16 x i16>
|
||||
@ -130,12 +130,12 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
|
||||
|
||||
define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
|
||||
; X32-LABEL: sext_16i8_16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sext_16i8_16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%t = sext <16 x i8> %z to <16 x i16>
|
||||
@ -144,7 +144,7 @@ define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
|
||||
|
||||
define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
|
||||
; X32-LABEL: trunc_16i16_16i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; X32-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
@ -154,7 +154,7 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc_16i16_16i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
@ -168,13 +168,13 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
|
||||
|
||||
define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
|
||||
; X32-LABEL: load_sext_test1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpmovsxdq (%eax), %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: load_sext_test1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxdq (%rdi), %ymm0
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x i32>, <4 x i32>* %ptr
|
||||
@ -184,13 +184,13 @@ define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
|
||||
|
||||
define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
|
||||
; X32-LABEL: load_sext_test2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpmovsxbq (%eax), %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: load_sext_test2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxbq (%rdi), %ymm0
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x i8>, <4 x i8>* %ptr
|
||||
@ -200,13 +200,13 @@ define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
|
||||
|
||||
define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
|
||||
; X32-LABEL: load_sext_test3:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpmovsxwq (%eax), %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: load_sext_test3:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxwq (%rdi), %ymm0
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x i16>, <4 x i16>* %ptr
|
||||
@ -216,13 +216,13 @@ define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
|
||||
|
||||
define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
|
||||
; X32-LABEL: load_sext_test4:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpmovsxwd (%eax), %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: load_sext_test4:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxwd (%rdi), %ymm0
|
||||
; X64-NEXT: retq
|
||||
%X = load <8 x i16>, <8 x i16>* %ptr
|
||||
@ -232,13 +232,13 @@ define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
|
||||
|
||||
define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
|
||||
; X32-LABEL: load_sext_test5:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpmovsxbd (%eax), %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: load_sext_test5:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovsxbd (%rdi), %ymm0
|
||||
; X64-NEXT: retq
|
||||
%X = load <8 x i8>, <8 x i8>* %ptr
|
||||
|
@ -1,17 +1,17 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64
|
||||
|
||||
; This test checks combinations of FNEG and FMA intrinsics
|
||||
|
||||
define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; X32-LABEL: test1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -24,12 +24,12 @@ declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x f
|
||||
|
||||
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; X32-LABEL: test2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -42,14 +42,14 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x
|
||||
|
||||
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; X32-LABEL: test3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: vbroadcastss LCPI2_0, %xmm1
|
||||
; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
||||
@ -64,12 +64,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4
|
||||
|
||||
define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; X32-LABEL: test4:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test4:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -80,14 +80,14 @@ entry:
|
||||
|
||||
define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; X32-LABEL: test5:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: vbroadcastss LCPI4_0, %ymm3
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm3
|
||||
; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
|
||||
; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test5:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
|
||||
; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2
|
||||
; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
@ -103,12 +103,12 @@ declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x f
|
||||
|
||||
define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; X32-LABEL: test6:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test6:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -1,13 +1,13 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
|
||||
<4 x i32>, <4 x float>, i8) nounwind readonly
|
||||
|
||||
define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
|
||||
; X32-LABEL: test_x86_avx2_gather_d_ps:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vgatherdps %xmm1, (%eax,%xmm0,2), %xmm2
|
||||
@ -15,7 +15,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x floa
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_x86_avx2_gather_d_ps:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vgatherdps %xmm1, (%rdi,%xmm0,2), %xmm2
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0
|
||||
@ -30,7 +30,7 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
|
||||
|
||||
define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
|
||||
; X32-LABEL: test_x86_avx2_gather_d_pd:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vgatherdpd %xmm1, (%eax,%xmm0,2), %xmm2
|
||||
@ -38,7 +38,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x dou
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_x86_avx2_gather_d_pd:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,2), %xmm2
|
||||
; X64-NEXT: vmovapd %xmm2, %xmm0
|
||||
@ -53,7 +53,7 @@ declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
|
||||
|
||||
define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x float> %mask) {
|
||||
; X32-LABEL: test_x86_avx2_gather_d_ps_256:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2
|
||||
@ -61,7 +61,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_x86_avx2_gather_d_ps_256:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0
|
||||
@ -76,7 +76,7 @@ declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
|
||||
|
||||
define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x double> %mask) {
|
||||
; X32-LABEL: test_x86_avx2_gather_d_pd_256:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorpd %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2
|
||||
@ -84,7 +84,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_x86_avx2_gather_d_pd_256:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vxorpd %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2
|
||||
; X64-NEXT: vmovapd %ymm2, %ymm0
|
||||
@ -96,7 +96,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x
|
||||
|
||||
define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
|
||||
; X32-LABEL: test_mm_i32gather_epi32:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
@ -105,7 +105,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_i32gather_epi32:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
|
||||
@ -122,7 +122,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>
|
||||
|
||||
define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
|
||||
; X32-LABEL: test_mm_i32gather_pd:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
@ -131,7 +131,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_i32gather_pd:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
|
||||
|
@ -1,17 +1,17 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: vpandn:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm1
|
||||
; X32-NEXT: vpandn %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vpandn:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm1
|
||||
; X64-NEXT: vpandn %ymm0, %ymm1, %ymm0
|
||||
@ -26,14 +26,14 @@ entry:
|
||||
|
||||
define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: vpand:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vpand:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
@ -47,14 +47,14 @@ entry:
|
||||
|
||||
define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: vpor:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vpor:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
@ -68,14 +68,14 @@ entry:
|
||||
|
||||
define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: vpxor:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vpxor:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
@ -89,14 +89,14 @@ entry:
|
||||
|
||||
define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
|
||||
; X32-LABEL: vpblendvb:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand LCPI4_0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vpblendvb:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
@ -107,12 +107,12 @@ define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
|
||||
|
||||
define <8 x i32> @allOnes() nounwind {
|
||||
; X32-LABEL: allOnes:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: allOnes:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
@ -120,12 +120,12 @@ define <8 x i32> @allOnes() nounwind {
|
||||
|
||||
define <16 x i16> @allOnes2() nounwind {
|
||||
; X32-LABEL: allOnes2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: allOnes2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
|
@ -1,15 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
|
||||
; X32-LABEL: phaddw1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phaddw1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
|
||||
@ -20,12 +20,12 @@ define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
|
||||
|
||||
define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
|
||||
; X32-LABEL: phaddw2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phaddw2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
|
||||
@ -36,12 +36,12 @@ define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
|
||||
|
||||
define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: phaddd1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phaddd1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
|
||||
@ -52,12 +52,12 @@ define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: phaddd2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phaddd2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
|
||||
@ -68,12 +68,12 @@ define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <8 x i32> @phaddd3(<8 x i32> %x) {
|
||||
; X32-LABEL: phaddd3:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphaddd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phaddd3:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphaddd %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
|
||||
@ -84,12 +84,12 @@ define <8 x i32> @phaddd3(<8 x i32> %x) {
|
||||
|
||||
define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
|
||||
; X32-LABEL: phsubw1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphsubw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phsubw1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphsubw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
|
||||
@ -100,12 +100,12 @@ define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
|
||||
|
||||
define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: phsubd1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phsubd1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
|
||||
@ -116,12 +116,12 @@ define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: phsubd2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: phsubd2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
|
||||
|
@ -1,15 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
|
||||
; X32-LABEL: variable_shl0:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl0:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%k = shl <4 x i32> %x, %y
|
||||
@ -18,12 +18,12 @@ define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
|
||||
|
||||
define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: variable_shl1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%k = shl <8 x i32> %x, %y
|
||||
@ -32,12 +32,12 @@ define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
|
||||
; X32-LABEL: variable_shl2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%k = shl <2 x i64> %x, %y
|
||||
@ -46,12 +46,12 @@ define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
|
||||
|
||||
define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
|
||||
; X32-LABEL: variable_shl3:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl3:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%k = shl <4 x i64> %x, %y
|
||||
@ -60,12 +60,12 @@ define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
|
||||
|
||||
define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
|
||||
; X32-LABEL: variable_srl0:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl0:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%k = lshr <4 x i32> %x, %y
|
||||
@ -74,12 +74,12 @@ define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
|
||||
|
||||
define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: variable_srl1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%k = lshr <8 x i32> %x, %y
|
||||
@ -88,12 +88,12 @@ define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
|
||||
; X32-LABEL: variable_srl2:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl2:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%k = lshr <2 x i64> %x, %y
|
||||
@ -102,12 +102,12 @@ define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
|
||||
|
||||
define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
|
||||
; X32-LABEL: variable_srl3:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl3:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%k = lshr <4 x i64> %x, %y
|
||||
@ -116,12 +116,12 @@ define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
|
||||
|
||||
define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
|
||||
; X32-LABEL: variable_sra0:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_sra0:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%k = ashr <4 x i32> %x, %y
|
||||
@ -130,12 +130,12 @@ define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
|
||||
|
||||
define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
|
||||
; X32-LABEL: variable_sra1:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_sra1:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%k = ashr <8 x i32> %x, %y
|
||||
@ -146,12 +146,12 @@ define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
|
||||
|
||||
define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift00:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpslld $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift00:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpslld $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
@ -160,12 +160,12 @@ define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
|
||||
|
||||
define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift01:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift01:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
@ -174,12 +174,12 @@ define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
|
||||
|
||||
define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift02:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllq $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift02:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllq $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
|
||||
@ -190,12 +190,12 @@ define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
|
||||
|
||||
define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift03:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrld $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift03:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrld $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
@ -204,12 +204,12 @@ define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
|
||||
|
||||
define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift04:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlw $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift04:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlw $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
@ -218,12 +218,12 @@ define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
|
||||
|
||||
define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift05:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlq $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift05:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlq $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
|
||||
@ -234,12 +234,12 @@ define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
|
||||
|
||||
define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift06:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrad $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift06:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrad $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
||||
@ -248,12 +248,12 @@ define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
|
||||
|
||||
define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
|
||||
; X32-LABEL: vshift07:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsraw $2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vshift07:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsraw $2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
@ -262,13 +262,13 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
|
||||
|
||||
define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
; X32-LABEL: variable_sra0_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_sra0_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <4 x i32>, <4 x i32>* %y
|
||||
@ -278,13 +278,13 @@ define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
|
||||
define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
; X32-LABEL: variable_sra1_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_sra1_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <8 x i32>, <8 x i32>* %y
|
||||
@ -294,13 +294,13 @@ define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
|
||||
define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
; X32-LABEL: variable_shl0_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl0_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <4 x i32>, <4 x i32>* %y
|
||||
@ -310,13 +310,13 @@ define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
|
||||
define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
; X32-LABEL: variable_shl1_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl1_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <8 x i32>, <8 x i32>* %y
|
||||
@ -326,13 +326,13 @@ define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
|
||||
define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
|
||||
; X32-LABEL: variable_shl2_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl2_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <2 x i64>, <2 x i64>* %y
|
||||
@ -342,13 +342,13 @@ define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
|
||||
|
||||
define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
|
||||
; X32-LABEL: variable_shl3_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl3_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <4 x i64>, <4 x i64>* %y
|
||||
@ -358,13 +358,13 @@ define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
|
||||
|
||||
define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
; X32-LABEL: variable_srl0_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl0_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <4 x i32>, <4 x i32>* %y
|
||||
@ -374,13 +374,13 @@ define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
|
||||
|
||||
define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
; X32-LABEL: variable_srl1_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl1_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <8 x i32>, <8 x i32>* %y
|
||||
@ -390,13 +390,13 @@ define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
|
||||
|
||||
define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
|
||||
; X32-LABEL: variable_srl2_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl2_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <2 x i64>, <2 x i64>* %y
|
||||
@ -406,13 +406,13 @@ define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
|
||||
|
||||
define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
|
||||
; X32-LABEL: variable_srl3_load:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_srl3_load:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%y1 = load <4 x i64>, <4 x i64>* %y
|
||||
@ -422,13 +422,13 @@ define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
|
||||
|
||||
define <32 x i8> @shl9(<32 x i8> %A) nounwind {
|
||||
; X32-LABEL: shl9:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand LCPI28_0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shl9:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -438,13 +438,13 @@ define <32 x i8> @shl9(<32 x i8> %A) nounwind {
|
||||
|
||||
define <32 x i8> @shr9(<32 x i8> %A) nounwind {
|
||||
; X32-LABEL: shr9:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand LCPI29_0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shr9:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -454,13 +454,13 @@ define <32 x i8> @shr9(<32 x i8> %A) nounwind {
|
||||
|
||||
define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
|
||||
; X32-LABEL: sra_v32i8_7:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sra_v32i8_7:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -470,16 +470,16 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
|
||||
|
||||
define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
|
||||
; X32-LABEL: sra_v32i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand LCPI31_0, %ymm0, %ymm0
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sra_v32i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
@ -492,13 +492,13 @@ define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
|
||||
|
||||
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
|
||||
; X32-LABEL: sext_v16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $8, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sext_v16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $8, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -509,13 +509,13 @@ define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
|
||||
|
||||
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
||||
; X32-LABEL: sext_v8i32:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpslld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sext_v8i32:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpslld $16, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -526,24 +526,24 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
||||
|
||||
define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; X32-LABEL: variable_shl16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_shl16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%res = shl <8 x i16> %lhs, %rhs
|
||||
@ -552,24 +552,24 @@ define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
|
||||
define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; X32-LABEL: variable_ashr16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_ashr16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%res = ashr <8 x i16> %lhs, %rhs
|
||||
@ -578,24 +578,24 @@ define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
|
||||
define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; X32-LABEL: variable_lshr16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: variable_lshr16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%res = lshr <8 x i16> %lhs, %rhs
|
||||
|
@ -1,16 +1,16 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
; AVX2 Logical Shift Left
|
||||
|
||||
define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
|
||||
@ -19,12 +19,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpaddw %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpaddw %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -34,12 +34,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -49,11 +49,11 @@ entry:
|
||||
|
||||
define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -62,12 +62,12 @@ entry:
|
||||
|
||||
define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -77,14 +77,14 @@ entry:
|
||||
|
||||
define <8 x i32> @test_vpslld_var(i32 %shift) {
|
||||
; X32-LABEL: test_vpslld_var:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
|
||||
; X32-NEXT: vpslld %xmm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpslld_var:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vmovd %edi, %xmm0
|
||||
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
|
||||
; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
|
||||
@ -96,12 +96,12 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
|
||||
|
||||
define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -111,11 +111,11 @@ entry:
|
||||
|
||||
define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
|
||||
@ -124,12 +124,12 @@ entry:
|
||||
|
||||
define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -139,12 +139,12 @@ entry:
|
||||
|
||||
define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -156,11 +156,11 @@ entry:
|
||||
|
||||
define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
|
||||
@ -169,12 +169,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsraw $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsraw $1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -184,12 +184,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -199,11 +199,11 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -212,12 +212,12 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrad $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrad $1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -227,12 +227,12 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -244,11 +244,11 @@ entry:
|
||||
|
||||
define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
|
||||
@ -257,12 +257,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -272,12 +272,12 @@ entry:
|
||||
|
||||
define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -287,11 +287,11 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -300,12 +300,12 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrld $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrld $1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -315,12 +315,12 @@ entry:
|
||||
|
||||
define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrld $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrld $31, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -330,11 +330,11 @@ entry:
|
||||
|
||||
define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_1:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
|
||||
@ -343,12 +343,12 @@ entry:
|
||||
|
||||
define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrlq $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_2:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrlq $1, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -358,12 +358,12 @@ entry:
|
||||
|
||||
define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_3:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpsrlq $63, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_3:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpsrlq $63, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -373,18 +373,17 @@ entry:
|
||||
|
||||
define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||
; X32-LABEL: srl_trunc_and_v4i64:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; X32-NEXT: vpbroadcastd LCPI25_0, %xmm2
|
||||
; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm2
|
||||
; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: srl_trunc_and_v4i64:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
|
||||
@ -392,7 +391,6 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
|
||||
%trunc = trunc <4 x i64> %and to <4 x i32>
|
||||
%sra = lshr <4 x i32> %x, %trunc
|
||||
@ -405,85 +403,80 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||
|
||||
define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: shl_8i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: shl_8i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%shl = shl <8 x i16> %r, %a
|
||||
ret <8 x i16> %shl
|
||||
}
|
||||
|
||||
define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: shl_16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: shl_16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%shl = shl <16 x i16> %r, %a
|
||||
ret <16 x i16> %shl
|
||||
}
|
||||
|
||||
define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: shl_32i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand LCPI28_0, %ymm2, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand LCPI28_1, %ymm2, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: shl_32i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
@ -496,76 +489,71 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%shl = shl <32 x i8> %r, %a
|
||||
ret <32 x i8> %shl
|
||||
}
|
||||
|
||||
define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: ashr_8i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: ashr_8i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%ashr = ashr <8 x i16> %r, %a
|
||||
ret <8 x i16> %ashr
|
||||
}
|
||||
|
||||
define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: ashr_16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: ashr_16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%ashr = ashr <16 x i16> %r, %a
|
||||
ret <16 x i16> %ashr
|
||||
}
|
||||
|
||||
define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: ashr_32i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; X32-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
@ -591,10 +579,9 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: ashr_32i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X64-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; X64-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
@ -620,93 +607,87 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%ashr = ashr <32 x i8> %r, %a
|
||||
ret <32 x i8> %ashr
|
||||
}
|
||||
|
||||
define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: lshr_8i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: lshr_8i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%lshr = lshr <8 x i16> %r, %a
|
||||
ret <8 x i16> %lshr
|
||||
}
|
||||
|
||||
define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: lshr_16i16:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: lshr_16i16:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%lshr = lshr <16 x i16> %r, %a
|
||||
ret <16 x i16> %lshr
|
||||
}
|
||||
|
||||
define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: lshr_32i8:
|
||||
; X32: ## BB#0:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand LCPI34_0, %ymm2, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand LCPI34_1, %ymm2, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand LCPI34_2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X32-NEXT: ## -- End function
|
||||
;
|
||||
; X64-LABEL: lshr_32i8:
|
||||
; X64: ## BB#0:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
@ -720,7 +701,6 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: ## -- End function
|
||||
%lshr = lshr <32 x i8> %r, %a
|
||||
ret <32 x i8> %lshr
|
||||
}
|
||||
|
@ -1,16 +1,16 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
|
||||
; X32-LABEL: perm_cl_int_8x32:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
|
||||
; X32-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: perm_cl_int_8x32:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0]
|
||||
; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -22,13 +22,13 @@ entry:
|
||||
|
||||
define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
|
||||
; X32-LABEL: perm_cl_fp_8x32:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vmovaps {{.*#+}} ymm1 = <u,7,2,u,4,u,1,6>
|
||||
; X32-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: perm_cl_fp_8x32:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vmovaps {{.*#+}} ymm1 = <u,7,2,u,4,u,1,6>
|
||||
; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
@ -39,12 +39,12 @@ entry:
|
||||
|
||||
define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
|
||||
; X32-LABEL: perm_cl_int_4x64:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: perm_cl_int_4x64:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
@ -54,12 +54,12 @@ entry:
|
||||
|
||||
define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
|
||||
; X32-LABEL: perm_cl_fp_4x64:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: perm_cl_fp_4x64:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user