mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 20:29:53 +00:00
AVX-512: added cost for some AVX-512 instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217863 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d41bba158c
commit
0218e1e1da
@ -225,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost(
|
||||
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
|
||||
{ ISD::SHL, MVT::v16i32, 1 },
|
||||
{ ISD::SRL, MVT::v16i32, 1 },
|
||||
{ ISD::SRA, MVT::v16i32, 1 },
|
||||
{ ISD::SHL, MVT::v8i64, 1 },
|
||||
{ ISD::SRL, MVT::v8i64, 1 },
|
||||
{ ISD::SRA, MVT::v8i64, 1 },
|
||||
};
|
||||
|
||||
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
|
||||
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
|
||||
// customize them to detect the cases where shift amount is a scalar one.
|
||||
@ -260,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost(
|
||||
{ ISD::UDIV, MVT::v4i64, 4*20 },
|
||||
};
|
||||
|
||||
if (ST->hasAVX512()) {
|
||||
int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
|
||||
if (Idx != -1)
|
||||
return LT.first * AVX512CostTable[Idx].Cost;
|
||||
}
|
||||
// Look for AVX2 lowering tricks.
|
||||
if (ST->hasAVX2()) {
|
||||
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
|
||||
@ -580,6 +594,38 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
||||
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
|
||||
}
|
||||
|
||||
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
|
||||
AVX512ConversionTbl[] = {
|
||||
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
|
||||
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
|
||||
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
|
||||
{ ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
|
||||
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
|
||||
|
||||
// v16i1 -> v16i32 - load + broadcast
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
|
||||
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
|
||||
|
||||
};
|
||||
|
||||
if (ST->hasAVX512()) {
|
||||
int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
|
||||
LTSrc.second);
|
||||
if (Idx != -1)
|
||||
return AVX512ConversionTbl[Idx].Cost;
|
||||
}
|
||||
EVT SrcTy = TLI->getValueType(Src);
|
||||
EVT DstTy = TLI->getValueType(Dst);
|
||||
|
||||
@ -612,6 +658,9 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
|
||||
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
|
||||
|
||||
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
|
||||
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
|
||||
@ -738,6 +787,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
{ ISD::SETCC, MVT::v32i8, 1 },
|
||||
};
|
||||
|
||||
static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
|
||||
{ ISD::SETCC, MVT::v8i64, 1 },
|
||||
{ ISD::SETCC, MVT::v16i32, 1 },
|
||||
{ ISD::SETCC, MVT::v8f64, 1 },
|
||||
{ ISD::SETCC, MVT::v16f32, 1 },
|
||||
};
|
||||
|
||||
if (ST->hasAVX512()) {
|
||||
int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
|
||||
if (Idx != -1)
|
||||
return LT.first * AVX512CostTbl[Idx].Cost;
|
||||
}
|
||||
|
||||
if (ST->hasAVX2()) {
|
||||
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
|
||||
if (Idx != -1)
|
||||
|
@ -1,3 +1,4 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
|
||||
|
||||
@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) {
|
||||
;CHECK-AVX: cost of 4 {{.*}} zext
|
||||
%D = zext <4 x i32> undef to <4 x i64>
|
||||
|
||||
;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
|
||||
%D1 = zext <16 x i32> undef to <16 x i64>
|
||||
|
||||
;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
|
||||
%D2 = sext <16 x i32> undef to <16 x i64>
|
||||
|
||||
;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
|
||||
%D3 = zext <16 x i16> undef to <16 x i32>
|
||||
;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
|
||||
%D4 = zext <16 x i8> undef to <16 x i32>
|
||||
;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
|
||||
%D5 = zext <16 x i1> undef to <16 x i32>
|
||||
|
||||
;CHECK-AVX2: cost of 2 {{.*}} trunc
|
||||
;CHECK-AVX: cost of 4 {{.*}} trunc
|
||||
%E = trunc <4 x i64> undef to <4 x i32>
|
||||
@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) {
|
||||
|
||||
;CHECK-AVX2: cost of 4 {{.*}} trunc
|
||||
;CHECK-AVX: cost of 9 {{.*}} trunc
|
||||
;CHECK_AVX512: cost of 1 {{.*}} G = trunc
|
||||
%G = trunc <8 x i64> undef to <8 x i32>
|
||||
|
||||
;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
|
||||
%G1 = trunc <16 x i64> undef to <16 x i32>
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
@ -211,3 +229,24 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
|
||||
%D1 = uitofp <8 x i32> %d to <8 x float>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fp_conv(<8 x float> %a, <16 x float>%b) {
|
||||
;CHECK-LABEL: for function 'fp_conv'
|
||||
; CHECK-AVX512: cost of 1 {{.*}} fpext
|
||||
%A1 = fpext <8 x float> %a to <8 x double>
|
||||
|
||||
; CHECK-AVX512: cost of 3 {{.*}} fpext
|
||||
%A2 = fpext <16 x float> %b to <16 x double>
|
||||
|
||||
; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
|
||||
; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
|
||||
%A3 = fpext <8 x float> %a to <8 x double>
|
||||
|
||||
; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
|
||||
; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
|
||||
%A4 = fptrunc <8 x double> undef to <8 x float>
|
||||
|
||||
; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
|
||||
%A5 = fptrunc <16 x double> undef to <16 x float>
|
||||
ret void
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) {
|
||||
;AVX2: cost of 1 {{.*}} fcmp
|
||||
%E = fcmp olt <4 x double> undef, undef
|
||||
|
||||
; AVX512: cost of 1 {{.*}} %E1 = fcmp
|
||||
%E1 = fcmp olt <16 x float> undef, undef
|
||||
; AVX512: cost of 2 {{.*}} %E2 = fcmp
|
||||
%E2 = fcmp olt <16 x double> undef, undef
|
||||
|
||||
; -- integers --
|
||||
|
||||
;AVX1: cost of 1 {{.*}} icmp
|
||||
@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) {
|
||||
;AVX2: cost of 1 {{.*}} icmp
|
||||
%M = icmp eq <32 x i8> undef, undef
|
||||
|
||||
; AVX512: cost of 1 {{.*}} %M1 = icmp
|
||||
%M1 = icmp eq <16 x i32> undef, undef
|
||||
; AVX512: cost of 2 {{.*}} %M2 = icmp
|
||||
%M2 = icmp eq <16 x i64> undef, undef
|
||||
|
||||
;CHECK: cost of 0 {{.*}} ret
|
||||
ret i32 undef
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user