[CostModel][X86] Moved legal uniform shift costs earlier.

XOP was prematurely matching, doubling the cost of ashr/lshr uniform shifts.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2017-01-08 13:12:03 +00:00
parent f1eea39deb
commit 93f6323c31
3 changed files with 44 additions and 32 deletions

View File

@ -207,6 +207,43 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i16, 1 }, // psllw.
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
};
if (ST->hasAVX2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry SSE2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
};
if (ST->hasSSE2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX512DQCostTable[] = {
{ ISD::MUL, MVT::v2i64, 1 },
{ ISD::MUL, MVT::v4i64, 1 },
@ -291,20 +328,6 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
};
if (ST->hasAVX2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry XOPShiftCostTable[] = {
// 128bit shifts take 1cy, but right shifts require negation beforehand.
{ ISD::SHL, MVT::v16i8, 1 },
@ -339,31 +362,23 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE2UniformCostTable[] = {
static const CostTblEntry SSE2UniformShiftCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
{ ISD::SHL, MVT::v16i16, 2 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
{ ISD::SHL, MVT::v8i32, 2 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
{ ISD::SHL, MVT::v4i64, 2 }, // psllq.
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
{ ISD::SRL, MVT::v16i16, 2 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
{ ISD::SRL, MVT::v8i32, 2 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
{ ISD::SRL, MVT::v4i64, 2 }, // psrlq.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v16i16, 2 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
{ ISD::SRA, MVT::v8i32, 2 }, // psrad.
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
@ -373,7 +388,7 @@ int X86TTIImpl::getArithmeticInstrCost(
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}

View File

@ -529,8 +529,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
@ -568,7 +567,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}

View File

@ -501,8 +501,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
}
@ -540,8 +539,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
@ -579,7 +577,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}