mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-04 19:07:26 +00:00
[CostModel][X86] Moved legal uniform shift costs earlier.
XOP was prematurely matching, doubling the cost of ashr/lshr uniform shifts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f1eea39deb
commit
93f6323c31
@ -207,6 +207,43 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v16i16, 1 }, // psllw.
|
||||
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
|
||||
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
|
||||
};
|
||||
|
||||
if (ST->hasAVX2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
|
||||
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
|
||||
|
||||
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
|
||||
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
|
||||
|
||||
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
|
||||
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
|
||||
};
|
||||
|
||||
if (ST->hasSSE2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512DQCostTable[] = {
|
||||
{ ISD::MUL, MVT::v2i64, 1 },
|
||||
{ ISD::MUL, MVT::v4i64, 1 },
|
||||
@ -291,20 +328,6 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
|
||||
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
|
||||
};
|
||||
|
||||
if (ST->hasAVX2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry XOPShiftCostTable[] = {
|
||||
// 128bit shifts take 1cy, but right shifts require negation beforehand.
|
||||
{ ISD::SHL, MVT::v16i8, 1 },
|
||||
@ -339,31 +362,23 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE2UniformCostTable[] = {
|
||||
static const CostTblEntry SSE2UniformShiftCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
|
||||
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v16i16, 2 }, // psllw.
|
||||
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
|
||||
{ ISD::SHL, MVT::v8i32, 2 }, // pslld
|
||||
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
|
||||
{ ISD::SHL, MVT::v4i64, 2 }, // psllq.
|
||||
|
||||
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v16i16, 2 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
|
||||
{ ISD::SRL, MVT::v8i32, 2 }, // psrld.
|
||||
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
|
||||
{ ISD::SRL, MVT::v4i64, 2 }, // psrlq.
|
||||
|
||||
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
|
||||
{ ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
|
||||
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
|
||||
{ ISD::SRA, MVT::v16i16, 2 }, // psraw.
|
||||
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
|
||||
{ ISD::SRA, MVT::v8i32, 2 }, // psrad.
|
||||
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
|
||||
{ ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
|
||||
@ -373,7 +388,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
|
||||
CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
|
@ -529,8 +529,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
|
||||
; AVX: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
|
||||
ret <4 x i32> %shift
|
||||
}
|
||||
@ -568,7 +567,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
|
||||
; AVX: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <8 x i16> %shift
|
||||
}
|
||||
|
@ -501,8 +501,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
|
||||
; AVX: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
|
||||
ret <2 x i64> %shift
|
||||
}
|
||||
@ -540,8 +539,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
|
||||
; AVX: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
|
||||
ret <4 x i32> %shift
|
||||
}
|
||||
@ -579,7 +577,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
|
||||
; AVX: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <8 x i16> %shift
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user