llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll
Craig Topper aa9982b218 [AVX-512] Add support for commuting VPERMT2(B/W/D/Q/PS/PD) to/from VPERMI2(B/W/D/Q/PS/PD).
Summary:
The index and one of the table operands can be swapped by changing the opcode to the other version. Neither of these operands are the one that can load from memory so this can't be used to increase memory folding opportunities.

We need to handle the unmasked forms and the kz forms. Since the load operand isn't being commuted we can commute the load and broadcast instructions too.

Reviewers: igorb, delena, Ayal, Farhana, RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D25652

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287621 91177308-0d34-0410-b5e6-96231b3b80d8
2016-11-22 04:57:34 +00:00

98 lines
5.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s
declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}
declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm3 {%k1}
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z}
; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}
declare <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
ret <64 x i8> %res
}