mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-07 04:21:27 +00:00
[AVX-512] Add VPMINS/MINU/MAXS/MAXU instructions to load folding tables.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294858 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b3ac0dcae6
commit
5bb68b46d1
@ -1929,14 +1929,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPINSRWZrr, X86::VPINSRWZrm, 0 },
|
||||
{ X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
|
||||
{ X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
|
||||
{ X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
|
||||
{ X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
|
||||
{ X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
|
||||
{ X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
|
||||
{ X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
|
||||
{ X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
|
||||
{ X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
|
||||
{ X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
|
||||
{ X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
|
||||
{ X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
|
||||
{ X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
|
||||
{ X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
|
||||
{ X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
|
||||
{ X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
|
||||
{ X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
|
||||
{ X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
|
||||
{ X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
|
||||
{ X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
|
||||
{ X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
|
||||
@ -2124,6 +2132,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
|
||||
{ X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
|
||||
{ X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
|
||||
{ X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
|
||||
{ X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
|
||||
{ X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
|
||||
{ X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
|
||||
{ X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
|
||||
{ X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
|
||||
{ X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
|
||||
{ X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
|
||||
{ X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
|
||||
{ X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
|
||||
{ X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
|
||||
{ X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
|
||||
{ X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
|
||||
{ X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
|
||||
{ X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
|
||||
{ X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
|
||||
{ X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
|
||||
{ X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
|
||||
{ X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
|
||||
{ X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
|
||||
{ X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
|
||||
{ X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
|
||||
{ X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
|
||||
{ X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
|
||||
{ X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
|
||||
{ X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
|
||||
{ X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
|
||||
{ X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
|
||||
{ X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
|
||||
{ X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
|
||||
{ X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
|
||||
{ X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
|
||||
{ X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
|
||||
{ X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
|
||||
{ X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
|
||||
@ -2531,6 +2571,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
|
||||
{ X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
|
||||
{ X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
|
||||
{ X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
|
||||
{ X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
|
||||
{ X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
|
||||
{ X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
|
||||
{ X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
|
||||
{ X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
|
||||
{ X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
|
||||
{ X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
|
||||
{ X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
|
||||
{ X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
|
||||
{ X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
|
||||
{ X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
|
||||
{ X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
|
||||
{ X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
|
||||
{ X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
|
||||
{ X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
|
||||
{ X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
|
||||
{ X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
|
||||
{ X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
|
||||
@ -2638,6 +2694,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
|
||||
{ X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
|
||||
{ X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
|
||||
{ X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
|
||||
{ X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
|
||||
{ X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
|
||||
{ X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
|
||||
{ X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
|
||||
{ X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
|
||||
{ X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
|
||||
{ X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
|
||||
{ X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
|
||||
{ X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
|
||||
{ X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
|
||||
{ X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
|
||||
{ X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
|
||||
{ X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
|
||||
{ X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
|
||||
{ X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
|
||||
{ X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
|
||||
{ X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
|
||||
{ X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
|
||||
@ -2735,6 +2807,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
|
||||
{ X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
|
||||
{ X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
|
||||
{ X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
|
||||
{ X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
|
||||
{ X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
|
||||
{ X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
|
||||
{ X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
|
||||
{ X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
|
||||
{ X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
|
||||
{ X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
|
||||
{ X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
|
||||
{ X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
|
||||
{ X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
|
||||
{ X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
|
||||
{ X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
|
||||
{ X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
|
||||
{ X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
|
||||
{ X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
|
||||
{ X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
|
||||
{ X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
|
||||
{ X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
|
||||
@ -2996,6 +3084,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
|
||||
{ X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
|
||||
{ X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
|
||||
{ X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
|
||||
{ X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
|
||||
{ X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
|
||||
{ X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
|
||||
{ X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
|
||||
{ X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
|
||||
{ X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
|
||||
{ X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
|
||||
{ X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
|
||||
{ X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
|
||||
{ X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
|
||||
{ X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
|
||||
{ X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
|
||||
{ X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
|
||||
{ X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
|
||||
{ X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
|
||||
{ X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
|
||||
{ X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
|
||||
{ X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
|
||||
@ -3116,6 +3220,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
|
||||
{ X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
|
||||
{ X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
|
||||
{ X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
|
||||
{ X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
|
||||
{ X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
|
||||
{ X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
|
||||
{ X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
|
||||
{ X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
|
||||
{ X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
|
||||
{ X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
|
||||
{ X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
|
||||
{ X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
|
||||
{ X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
|
||||
{ X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
|
||||
{ X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
|
||||
{ X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
|
||||
{ X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
|
||||
{ X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
|
||||
{ X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
|
||||
{ X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
|
||||
{ X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
|
||||
@ -3227,6 +3347,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
|
||||
{ X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
|
||||
{ X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
|
||||
{ X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
|
||||
{ X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
|
||||
{ X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
|
||||
{ X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
|
||||
{ X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
|
||||
{ X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
|
||||
{ X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
|
||||
{ X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
|
||||
{ X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
|
||||
{ X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
|
||||
{ X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
|
||||
{ X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
|
||||
{ X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
|
||||
{ X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
|
||||
{ X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
|
||||
{ X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
|
||||
{ X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
|
||||
{ X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
|
||||
{ X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
|
||||
|
@ -726,6 +726,328 @@ define <8 x i32> @stack_fold_pmaddwd_ymm_maskz(<16 x i16> %a0, <16 x i16> %a1, i
|
||||
ret <8 x i32> %4
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsb
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pmaxsb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsb_ymm
|
||||
;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxsd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsd_ymm
|
||||
;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxsq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsq
|
||||
;CHECK: vpmaxsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxsq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsq_ymm
|
||||
;CHECK: vpmaxsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxsw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxsw_ymm
|
||||
;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pmaxub_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxub_ymm
|
||||
;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pmaxud_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxud_ymm
|
||||
;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq_mask(<2 x i64>* %passthru, <2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_mask
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <2 x i64>, <2 x i64>* %passthru
|
||||
%3 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %2, i8 %mask)
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_pmaxuq_maskz(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_maskz
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> zeroinitializer, i8 %mask)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm_mask(<4 x i64>* %passthru, <4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm_mask
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = load <4 x i64>, <4 x i64>* %passthru
|
||||
%3 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %2, i8 %mask)
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
|
||||
define <4 x i64> @stack_fold_pmaxuq_ymm_maskz(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuq_ymm_maskz
|
||||
;CHECK: vpmaxuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> zeroinitializer, i8 %mask)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pmaxuw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaxuw_ymm
|
||||
;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminsb_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsb_ymm
|
||||
;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminsd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsd_ymm
|
||||
;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pminsq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsq
|
||||
;CHECK: vpminsq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pminsq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsq_ymm
|
||||
;CHECK: vpminsq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminsw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminsw_ymm
|
||||
;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <32 x i8> @stack_fold_pminub_ymm(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminub_ymm
|
||||
;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
|
||||
ret <32 x i8> %2
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i32> @stack_fold_pminud_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminud_ymm
|
||||
;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pminuq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuq
|
||||
;CHECK: vpminuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i64> @stack_fold_pminuq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuq_ymm
|
||||
;CHECK: vpminuq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i16> @stack_fold_pminuw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pminuw_ymm
|
||||
;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||||
ret <16 x i16> %2
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_vpmovdw(<8 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_vpmovdw
|
||||
;CHECK: vpmovdw %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
|
||||
|
Loading…
Reference in New Issue
Block a user