From 3f93ecf4334f47d25eaf12ca528b8c9e9904ac34 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 26 Nov 2016 07:20:53 +0000 Subject: [PATCH] [AVX-512] Add VLX versions of VDIVPD/PS and VMULPD/PS to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287970 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 +++ test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 64 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c66fcfd7aed..fd145870eab 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1871,6 +1871,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 }, + { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 }, + { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 }, + { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 }, + { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 }, { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 }, { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 }, { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 }, @@ -1891,6 +1895,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 }, { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 }, { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, + { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 }, + { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 }, + { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 }, + { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 }, { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 }, { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 }, { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 }, diff --git a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 941ea69a498..1cb391a4f83 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -184,6 +184,38 @@ define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { } declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) +define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { + ;CHECK-LABEL: stack_fold_divpd + ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fdiv <2 x double> %a0, %a1 + ret <2 x double> %2 +} + +define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) { + ;CHECK-LABEL: stack_fold_divpd_ymm + ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fdiv <4 x double> %a0, %a1 + ret <4 x double> %2 +} + +define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { + ;CHECK-LABEL: stack_fold_divps + ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fdiv <4 x float> %a0, %a1 + ret <4 x float> %2 +} + +define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) { + ;CHECK-LABEL: stack_fold_divps_ymm + ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fdiv <8 x float> %a0, %a1 + ret <8 x float> %2 +} + define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload @@ -286,6 +318,38 @@ define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> ret <8 x float> %2 } +define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { + ;CHECK-LABEL: stack_fold_mulpd + ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fmul <2 x double> %a0, %a1 + ret <2 x double> %2 +} + +define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) { + ;CHECK-LABEL: stack_fold_mulpd_ymm + ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fmul <4 x double> %a0, %a1 + ret <4 x double> %2 +} + +define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { + ;CHECK-LABEL: stack_fold_mulps + ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fmul <4 x float> %a0, %a1 + ret <4 x float> %2 +} + +define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) { + ;CHECK-LABEL: stack_fold_mulps_ymm + ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = fmul <8 x float> %a0, %a1 + ret <8 x float> %2 +} + define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_orpd ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload