mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-12 23:40:54 +00:00
c7cbba03a9
VPTERNLOG is a ternary instruction with an immediate specifying the logical operation to perform. For each bit position in the 3 source vectors the bit from each source is concatenated together and the resulting 3-bit value is used to select a bit in the immediate. This bit value is written to the result vector. We can commute this by swapping operands and modifying the immediate. To modify the immediate we need to swap two pairs of bits. The pairs correspond to the locations in the immediate where the commuted operands bits have opposite values and the uncommuted operand has the same value. Bits 0 and 7 will never be swapped since the relevant bits from all sources are the same value. This refactors and reuses parts of the FMA3 commuting code which is also a three operand instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282132 91177308-0d34-0410-b5e6-96231b3b80d8
494 lines
22 KiB
LLVM
494 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
|
|
|
; These test cases demonstrate cases where vpternlog could benefit from being commuted.
|
|
|
|
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
|
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, %zmm2, %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, %zmm0, %zmm2, %zmm1
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load0:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load1:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load2:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load0:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load1:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load2:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load0:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load1:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load2:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load0:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load1:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load2:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 -1)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm0, %zmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm1, %zmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load0_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
|
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm0, %zmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load1_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load2_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load0_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load1_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
|
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm0, %zmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load2_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load0_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load1_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm0, %zmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load2_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
|
; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm1, %zmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load0_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
|
; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm1, %zmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load1_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load2_mask:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, %zmm0, %zmm2, %zmm1 {%k1} {z}
|
|
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load0_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load1_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_012_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_012_load2_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load0_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load1_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_102_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_102_load2_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load0_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load1_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_210_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_210_load2_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load0_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load1_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|
|
|
|
define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
|
|
; CHECK-LABEL: vpternlog_v16i32_021_load2_maskz:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
|
|
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 33, i16 %mask)
|
|
ret <16 x i32> %res
|
|
}
|