mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-21 02:59:15 +00:00
X86 cost model: Vector shifts are expensive in most cases
The default logic does not correctly identify costs of casts because they are marked as custom on x86. For some cases, where the shift amount is a scalar we would be able to generate better code. Unfortunately, when this is the case the value (the splat) will get hoisted out of the loop, thereby making it invisible to ISel. radar://13130673 radar://13537826 llvm-svn: 178703
This commit is contained in:
parent
af01832c73
commit
329430aeac
@ -182,6 +182,16 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
{ ISD::SRL, MVT::v2i64, 1 },
|
||||
{ ISD::SHL, MVT::v4i64, 1 },
|
||||
{ ISD::SRL, MVT::v4i64, 1 },
|
||||
|
||||
{ ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence.
|
||||
{ ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized.
|
||||
|
||||
{ ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized.
|
||||
{ ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized.
|
||||
|
||||
{ ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized.
|
||||
{ ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized.
|
||||
{ ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
|
||||
};
|
||||
|
||||
// Look for AVX2 lowering tricks.
|
||||
@ -192,6 +202,38 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
return LT.first * AVX2CostTable[Idx].Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry<MVT> SSE2CostTable[] = {
|
||||
// We don't correctly identify costs of casts because they are marked as
|
||||
// custom.
|
||||
// For some cases, where the shift amount is a scalar we would be able
|
||||
// to generate better code. Unfortunately, when this is the case the value
|
||||
// (the splat) will get hoisted out of the loop, thereby making it invisible
|
||||
// to ISel. The cost model must return worst case assumptions because it is
|
||||
// used for vectorization and we don't want to make vectorized code worse
|
||||
// than scalar code.
|
||||
{ ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence.
|
||||
{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
|
||||
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
|
||||
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
|
||||
|
||||
{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
|
||||
{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
|
||||
{ ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
|
||||
{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
|
||||
|
||||
{ ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized.
|
||||
{ ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized.
|
||||
{ ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
|
||||
{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
|
||||
};
|
||||
|
||||
if (ST->hasSSE2()) {
|
||||
int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
|
||||
ISD, LT.second);
|
||||
if (Idx != -1)
|
||||
return LT.first * SSE2CostTable[Idx].Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry<MVT> AVX1CostTable[] = {
|
||||
// We don't have to scalarize unsupported ops. We can issue two half-sized
|
||||
// operations and we only need to extract the upper YMM half.
|
||||
|
@ -94,7 +94,7 @@ define void @shift() {
|
||||
; AVX2: cost of 1 {{.*}} ashr
|
||||
%C0 = ashr <4 x i32> undef, undef
|
||||
; AVX: cost of 6 {{.*}} ashr
|
||||
; AVX2: cost of 6 {{.*}} ashr
|
||||
; AVX2: cost of 20 {{.*}} ashr
|
||||
%C1 = ashr <2 x i64> undef, undef
|
||||
|
||||
ret void
|
||||
@ -121,7 +121,7 @@ define void @avx2shift() {
|
||||
; AVX2: cost of 1 {{.*}} ashr
|
||||
%C0 = ashr <8 x i32> undef, undef
|
||||
; AVX: cost of 12 {{.*}} ashr
|
||||
; AVX2: cost of 12 {{.*}} ashr
|
||||
; AVX2: cost of 40 {{.*}} ashr
|
||||
%C1 = ashr <4 x i64> undef, undef
|
||||
|
||||
ret void
|
||||
|
243
test/Analysis/CostModel/X86/testshiftashr.ll
Normal file
243
test/Analysis/CostModel/X86/testshiftashr.ll
Normal file
@ -0,0 +1,243 @@
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
|
||||
|
||||
%shifttype = type <2 x i16>
|
||||
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
|
||||
entry:
|
||||
; SSE2: shift2i16
|
||||
; SSE2: cost of 20 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift2i16
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype %a , %b
|
||||
ret %shifttype %0
|
||||
}
|
||||
|
||||
%shifttype4i16 = type <4 x i16>
|
||||
define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i16
|
||||
; SSE2: cost of 40 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift4i16
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype4i16 %a , %b
|
||||
ret %shifttype4i16 %0
|
||||
}
|
||||
|
||||
%shifttype8i16 = type <8 x i16>
|
||||
define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i16
|
||||
; SSE2: cost of 80 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift8i16
|
||||
; SSE2-CODEGEN: sarw %cl
|
||||
|
||||
%0 = ashr %shifttype8i16 %a , %b
|
||||
ret %shifttype8i16 %0
|
||||
}
|
||||
|
||||
%shifttype16i16 = type <16 x i16>
|
||||
define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i16
|
||||
; SSE2: cost of 160 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift16i16
|
||||
; SSE2-CODEGEN: sarw %cl
|
||||
|
||||
%0 = ashr %shifttype16i16 %a , %b
|
||||
ret %shifttype16i16 %0
|
||||
}
|
||||
|
||||
%shifttype32i16 = type <32 x i16>
|
||||
define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i16
|
||||
; SSE2: cost of 320 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift32i16
|
||||
; SSE2-CODEGEN: sarw %cl
|
||||
|
||||
%0 = ashr %shifttype32i16 %a , %b
|
||||
ret %shifttype32i16 %0
|
||||
}
|
||||
|
||||
%shifttype2i32 = type <2 x i32>
|
||||
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i32
|
||||
; SSE2: cost of 20 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift2i32
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype2i32 %a , %b
|
||||
ret %shifttype2i32 %0
|
||||
}
|
||||
|
||||
%shifttype4i32 = type <4 x i32>
|
||||
define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i32
|
||||
; SSE2: cost of 40 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift4i32
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype4i32 %a , %b
|
||||
ret %shifttype4i32 %0
|
||||
}
|
||||
|
||||
%shifttype8i32 = type <8 x i32>
|
||||
define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i32
|
||||
; SSE2: cost of 80 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift8i32
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype8i32 %a , %b
|
||||
ret %shifttype8i32 %0
|
||||
}
|
||||
|
||||
%shifttype16i32 = type <16 x i32>
|
||||
define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i32
|
||||
; SSE2: cost of 160 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift16i32
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype16i32 %a , %b
|
||||
ret %shifttype16i32 %0
|
||||
}
|
||||
|
||||
%shifttype32i32 = type <32 x i32>
|
||||
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i32
|
||||
; SSE2: cost of 256 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift32i32
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype32i32 %a , %b
|
||||
ret %shifttype32i32 %0
|
||||
}
|
||||
|
||||
%shifttype2i64 = type <2 x i64>
|
||||
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i64
|
||||
; SSE2: cost of 20 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift2i64
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype2i64 %a , %b
|
||||
ret %shifttype2i64 %0
|
||||
}
|
||||
|
||||
%shifttype4i64 = type <4 x i64>
|
||||
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i64
|
||||
; SSE2: cost of 40 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift4i64
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype4i64 %a , %b
|
||||
ret %shifttype4i64 %0
|
||||
}
|
||||
|
||||
%shifttype8i64 = type <8 x i64>
|
||||
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i64
|
||||
; SSE2: cost of 80 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift8i64
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype8i64 %a , %b
|
||||
ret %shifttype8i64 %0
|
||||
}
|
||||
|
||||
%shifttype16i64 = type <16 x i64>
|
||||
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i64
|
||||
; SSE2: cost of 160 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift16i64
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype16i64 %a , %b
|
||||
ret %shifttype16i64 %0
|
||||
}
|
||||
|
||||
%shifttype32i64 = type <32 x i64>
|
||||
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i64
|
||||
; SSE2: cost of 256 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift32i64
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype32i64 %a , %b
|
||||
ret %shifttype32i64 %0
|
||||
}
|
||||
|
||||
%shifttype2i8 = type <2 x i8>
|
||||
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i8
|
||||
; SSE2: cost of 20 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift2i8
|
||||
; SSE2-CODEGEN: sarq %cl
|
||||
|
||||
%0 = ashr %shifttype2i8 %a , %b
|
||||
ret %shifttype2i8 %0
|
||||
}
|
||||
|
||||
%shifttype4i8 = type <4 x i8>
|
||||
define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i8
|
||||
; SSE2: cost of 40 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift4i8
|
||||
; SSE2-CODEGEN: sarl %cl
|
||||
|
||||
%0 = ashr %shifttype4i8 %a , %b
|
||||
ret %shifttype4i8 %0
|
||||
}
|
||||
|
||||
%shifttype8i8 = type <8 x i8>
|
||||
define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i8
|
||||
; SSE2: cost of 80 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift8i8
|
||||
; SSE2-CODEGEN: sarw %cl
|
||||
|
||||
%0 = ashr %shifttype8i8 %a , %b
|
||||
ret %shifttype8i8 %0
|
||||
}
|
||||
|
||||
%shifttype16i8 = type <16 x i8>
|
||||
define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i8
|
||||
; SSE2: cost of 160 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift16i8
|
||||
; SSE2-CODEGEN: sarb %cl
|
||||
|
||||
%0 = ashr %shifttype16i8 %a , %b
|
||||
ret %shifttype16i8 %0
|
||||
}
|
||||
|
||||
%shifttype32i8 = type <32 x i8>
|
||||
define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i8
|
||||
; SSE2: cost of 320 {{.*}} ashr
|
||||
; SSE2-CODEGEN: shift32i8
|
||||
; SSE2-CODEGEN: sarb %cl
|
||||
|
||||
%0 = ashr %shifttype32i8 %a , %b
|
||||
ret %shifttype32i8 %0
|
||||
}
|
||||
|
243
test/Analysis/CostModel/X86/testshiftlshr.ll
Normal file
243
test/Analysis/CostModel/X86/testshiftlshr.ll
Normal file
@ -0,0 +1,243 @@
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
|
||||
|
||||
%shifttype = type <2 x i16>
|
||||
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
|
||||
entry:
|
||||
; SSE2: shift2i16
|
||||
; SSE2: cost of 20 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift2i16
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype %a , %b
|
||||
ret %shifttype %0
|
||||
}
|
||||
|
||||
%shifttype4i16 = type <4 x i16>
|
||||
define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i16
|
||||
; SSE2: cost of 40 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift4i16
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype4i16 %a , %b
|
||||
ret %shifttype4i16 %0
|
||||
}
|
||||
|
||||
%shifttype8i16 = type <8 x i16>
|
||||
define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i16
|
||||
; SSE2: cost of 80 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift8i16
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype8i16 %a , %b
|
||||
ret %shifttype8i16 %0
|
||||
}
|
||||
|
||||
%shifttype16i16 = type <16 x i16>
|
||||
define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i16
|
||||
; SSE2: cost of 160 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift16i16
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype16i16 %a , %b
|
||||
ret %shifttype16i16 %0
|
||||
}
|
||||
|
||||
%shifttype32i16 = type <32 x i16>
|
||||
define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i16
|
||||
; SSE2: cost of 320 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift32i16
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype32i16 %a , %b
|
||||
ret %shifttype32i16 %0
|
||||
}
|
||||
|
||||
%shifttype2i32 = type <2 x i32>
|
||||
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i32
|
||||
; SSE2: cost of 20 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift2i32
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype2i32 %a , %b
|
||||
ret %shifttype2i32 %0
|
||||
}
|
||||
|
||||
%shifttype4i32 = type <4 x i32>
|
||||
define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i32
|
||||
; SSE2: cost of 40 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift4i32
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype4i32 %a , %b
|
||||
ret %shifttype4i32 %0
|
||||
}
|
||||
|
||||
%shifttype8i32 = type <8 x i32>
|
||||
define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i32
|
||||
; SSE2: cost of 80 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift8i32
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype8i32 %a , %b
|
||||
ret %shifttype8i32 %0
|
||||
}
|
||||
|
||||
%shifttype16i32 = type <16 x i32>
|
||||
define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i32
|
||||
; SSE2: cost of 160 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift16i32
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype16i32 %a , %b
|
||||
ret %shifttype16i32 %0
|
||||
}
|
||||
|
||||
%shifttype32i32 = type <32 x i32>
|
||||
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i32
|
||||
; SSE2: cost of 256 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift32i32
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype32i32 %a , %b
|
||||
ret %shifttype32i32 %0
|
||||
}
|
||||
|
||||
%shifttype2i64 = type <2 x i64>
|
||||
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i64
|
||||
; SSE2: cost of 20 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift2i64
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype2i64 %a , %b
|
||||
ret %shifttype2i64 %0
|
||||
}
|
||||
|
||||
%shifttype4i64 = type <4 x i64>
|
||||
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i64
|
||||
; SSE2: cost of 40 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift4i64
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype4i64 %a , %b
|
||||
ret %shifttype4i64 %0
|
||||
}
|
||||
|
||||
%shifttype8i64 = type <8 x i64>
|
||||
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i64
|
||||
; SSE2: cost of 80 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift8i64
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype8i64 %a , %b
|
||||
ret %shifttype8i64 %0
|
||||
}
|
||||
|
||||
%shifttype16i64 = type <16 x i64>
|
||||
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i64
|
||||
; SSE2: cost of 160 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift16i64
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype16i64 %a , %b
|
||||
ret %shifttype16i64 %0
|
||||
}
|
||||
|
||||
%shifttype32i64 = type <32 x i64>
|
||||
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i64
|
||||
; SSE2: cost of 256 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift32i64
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype32i64 %a , %b
|
||||
ret %shifttype32i64 %0
|
||||
}
|
||||
|
||||
%shifttype2i8 = type <2 x i8>
|
||||
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i8
|
||||
; SSE2: cost of 20 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift2i8
|
||||
; SSE2-CODEGEN: shrq %cl
|
||||
|
||||
%0 = lshr %shifttype2i8 %a , %b
|
||||
ret %shifttype2i8 %0
|
||||
}
|
||||
|
||||
%shifttype4i8 = type <4 x i8>
|
||||
define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i8
|
||||
; SSE2: cost of 40 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift4i8
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype4i8 %a , %b
|
||||
ret %shifttype4i8 %0
|
||||
}
|
||||
|
||||
%shifttype8i8 = type <8 x i8>
|
||||
define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i8
|
||||
; SSE2: cost of 80 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift8i8
|
||||
; SSE2-CODEGEN: shrl %cl
|
||||
|
||||
%0 = lshr %shifttype8i8 %a , %b
|
||||
ret %shifttype8i8 %0
|
||||
}
|
||||
|
||||
%shifttype16i8 = type <16 x i8>
|
||||
define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i8
|
||||
; SSE2: cost of 160 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift16i8
|
||||
; SSE2-CODEGEN: shrb %cl
|
||||
|
||||
%0 = lshr %shifttype16i8 %a , %b
|
||||
ret %shifttype16i8 %0
|
||||
}
|
||||
|
||||
%shifttype32i8 = type <32 x i8>
|
||||
define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i8
|
||||
; SSE2: cost of 320 {{.*}} lshr
|
||||
; SSE2-CODEGEN: shift32i8
|
||||
; SSE2-CODEGEN: shrb %cl
|
||||
|
||||
%0 = lshr %shifttype32i8 %a , %b
|
||||
ret %shifttype32i8 %0
|
||||
}
|
||||
|
242
test/Analysis/CostModel/X86/testshiftshl.ll
Normal file
242
test/Analysis/CostModel/X86/testshiftshl.ll
Normal file
@ -0,0 +1,242 @@
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
|
||||
|
||||
%shifttype = type <2 x i16>
|
||||
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
|
||||
entry:
|
||||
; SSE2: shift2i16
|
||||
; SSE2: cost of 20 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift2i16
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype %a , %b
|
||||
ret %shifttype %0
|
||||
}
|
||||
|
||||
%shifttype4i16 = type <4 x i16>
|
||||
define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i16
|
||||
; SSE2: cost of 10 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift4i16
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype4i16 %a , %b
|
||||
ret %shifttype4i16 %0
|
||||
}
|
||||
|
||||
%shifttype8i16 = type <8 x i16>
|
||||
define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i16
|
||||
; SSE2: cost of 80 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift8i16
|
||||
; SSE2-CODEGEN: shll %cl
|
||||
|
||||
%0 = shl %shifttype8i16 %a , %b
|
||||
ret %shifttype8i16 %0
|
||||
}
|
||||
|
||||
%shifttype16i16 = type <16 x i16>
|
||||
define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i16
|
||||
; SSE2: cost of 160 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift16i16
|
||||
; SSE2-CODEGEN: shll %cl
|
||||
|
||||
%0 = shl %shifttype16i16 %a , %b
|
||||
ret %shifttype16i16 %0
|
||||
}
|
||||
|
||||
%shifttype32i16 = type <32 x i16>
|
||||
define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i16
|
||||
; SSE2: cost of 320 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift32i16
|
||||
; SSE2-CODEGEN: shll %cl
|
||||
|
||||
%0 = shl %shifttype32i16 %a , %b
|
||||
ret %shifttype32i16 %0
|
||||
}
|
||||
|
||||
%shifttype2i32 = type <2 x i32>
|
||||
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i32
|
||||
; SSE2: cost of 20 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift2i32
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype2i32 %a , %b
|
||||
ret %shifttype2i32 %0
|
||||
}
|
||||
|
||||
%shifttype4i32 = type <4 x i32>
|
||||
define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i32
|
||||
; SSE2: cost of 10 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift4i32
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype4i32 %a , %b
|
||||
ret %shifttype4i32 %0
|
||||
}
|
||||
|
||||
%shifttype8i32 = type <8 x i32>
|
||||
define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i32
|
||||
; SSE2: cost of 20 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift8i32
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype8i32 %a , %b
|
||||
ret %shifttype8i32 %0
|
||||
}
|
||||
|
||||
%shifttype16i32 = type <16 x i32>
|
||||
define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i32
|
||||
; SSE2: cost of 40 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift16i32
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype16i32 %a , %b
|
||||
ret %shifttype16i32 %0
|
||||
}
|
||||
|
||||
%shifttype32i32 = type <32 x i32>
|
||||
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i32
|
||||
; SSE2: cost of 256 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift32i32
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype32i32 %a , %b
|
||||
ret %shifttype32i32 %0
|
||||
}
|
||||
|
||||
%shifttype2i64 = type <2 x i64>
|
||||
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i64
|
||||
; SSE2: cost of 20 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift2i64
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype2i64 %a , %b
|
||||
ret %shifttype2i64 %0
|
||||
}
|
||||
|
||||
%shifttype4i64 = type <4 x i64>
|
||||
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i64
|
||||
; SSE2: cost of 40 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift4i64
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype4i64 %a , %b
|
||||
ret %shifttype4i64 %0
|
||||
}
|
||||
|
||||
%shifttype8i64 = type <8 x i64>
|
||||
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i64
|
||||
; SSE2: cost of 80 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift8i64
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype8i64 %a , %b
|
||||
ret %shifttype8i64 %0
|
||||
}
|
||||
|
||||
%shifttype16i64 = type <16 x i64>
|
||||
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i64
|
||||
; SSE2: cost of 160 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift16i64
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype16i64 %a , %b
|
||||
ret %shifttype16i64 %0
|
||||
}
|
||||
|
||||
%shifttype32i64 = type <32 x i64>
|
||||
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i64
|
||||
; SSE2: cost of 256 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift32i64
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype32i64 %a , %b
|
||||
ret %shifttype32i64 %0
|
||||
}
|
||||
|
||||
%shifttype2i8 = type <2 x i8>
|
||||
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift2i8
|
||||
; SSE2: cost of 20 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift2i8
|
||||
; SSE2-CODEGEN: shlq %cl
|
||||
|
||||
%0 = shl %shifttype2i8 %a , %b
|
||||
ret %shifttype2i8 %0
|
||||
}
|
||||
|
||||
%shifttype4i8 = type <4 x i8>
|
||||
define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift4i8
|
||||
; SSE2: cost of 10 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift4i8
|
||||
; SSE2-CODEGEN: pmuludq
|
||||
|
||||
%0 = shl %shifttype4i8 %a , %b
|
||||
ret %shifttype4i8 %0
|
||||
}
|
||||
|
||||
%shifttype8i8 = type <8 x i8>
|
||||
define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift8i8
|
||||
; SSE2: cost of 80 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift8i8
|
||||
; SSE2-CODEGEN: shll
|
||||
|
||||
%0 = shl %shifttype8i8 %a , %b
|
||||
ret %shifttype8i8 %0
|
||||
}
|
||||
|
||||
%shifttype16i8 = type <16 x i8>
|
||||
define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift16i8
|
||||
; SSE2: cost of 30 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift16i8
|
||||
; SSE2-CODEGEN: cmpeqb
|
||||
|
||||
%0 = shl %shifttype16i8 %a , %b
|
||||
ret %shifttype16i8 %0
|
||||
}
|
||||
|
||||
%shifttype32i8 = type <32 x i8>
|
||||
define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
|
||||
entry:
|
||||
; SSE2: shift32i8
|
||||
; SSE2: cost of 60 {{.*}} shl
|
||||
; SSE2-CODEGEN: shift32i8
|
||||
; SSE2-CODEGEN: cmpeqb
|
||||
|
||||
%0 = shl %shifttype32i8 %a , %b
|
||||
ret %shifttype32i8 %0
|
||||
}
|
Loading…
Reference in New Issue
Block a user