ARM cost model: Cost for scalar integer casts and floating point conversions

Also adds some costs for vector integer float conversions.

llvm-svn: 174371
This commit is contained in:
Arnold Schwaighofer 2013-02-05 14:05:55 +00:00
parent ad94615e06
commit d1587de3eb
2 changed files with 266 additions and 7 deletions

View File

@ -177,25 +177,126 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost
// TODO: Get these tables to know at least what the related operations are
static const TypeConversionCostTblEntry<MVT> NEONConversionTbl[] = {
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
};
if (ST->hasNEON()) {
int Idx = ConvertCostTableLookup<MVT>(NEONConversionTbl,
array_lengthof(NEONConversionTbl),
if (SrcTy.isVector() && ST->hasNEON()) {
int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
array_lengthof(NEONVectorConversionTbl),
ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONConversionTbl[Idx].Cost;
return NEONVectorConversionTbl[Idx].Cost;
}
// Scalar float to integer conversions.
static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
array_lengthof(NEONFloatConversionTbl),
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT());
if (Idx != -1)
return NEONFloatConversionTbl[Idx].Cost;
}
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
};
if (SrcTy.isInteger() && ST->hasNEON()) {
int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
array_lengthof(NEONIntegerConversionTbl),
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT());
if (Idx != -1)
return NEONIntegerConversionTbl[Idx].Cost;
}
// Scalar integer conversion costs.
static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
// Truncates on i64 are assumed to be free.
{ ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
{ ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
};
if (SrcTy.isInteger()) {
int Idx =
ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
array_lengthof(ARMIntegerConversionTbl),
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT());
if (Idx != -1)
return ARMIntegerConversionTbl[Idx].Cost;
}
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}

View File

@ -0,0 +1,158 @@
; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios6.0.0"
define i32 @casts() {
; -- scalars --
; CHECK: cost of 1 {{.*}} sext
%r0 = sext i1 undef to i8
; CHECK: cost of 1 {{.*}} zext
%r1 = zext i1 undef to i8
; CHECK: cost of 1 {{.*}} sext
%r2 = sext i1 undef to i16
; CHECK: cost of 1 {{.*}} zext
%r3 = zext i1 undef to i16
; CHECK: cost of 1 {{.*}} sext
%r4 = sext i1 undef to i32
; CHECK: cost of 1 {{.*}} zext
%r5 = zext i1 undef to i32
; CHECK: cost of 1 {{.*}} sext
%r6 = sext i1 undef to i64
; CHECK: cost of 1 {{.*}} zext
%r7 = zext i1 undef to i64
; CHECK: cost of 0 {{.*}} trunc
%r8 = trunc i8 undef to i1
; CHECK: cost of 1 {{.*}} sext
%r9 = sext i8 undef to i16
; CHECK: cost of 1 {{.*}} zext
%r10 = zext i8 undef to i16
; CHECK: cost of 1 {{.*}} sext
%r11 = sext i8 undef to i32
; CHECK: cost of 1 {{.*}} zext
%r12 = zext i8 undef to i32
; CHECK: cost of 1 {{.*}} sext
%r13 = sext i8 undef to i64
; CHECK: cost of 1 {{.*}} zext
%r14 = zext i8 undef to i64
; CHECK: cost of 0 {{.*}} trunc
%r15 = trunc i16 undef to i1
; CHECK: cost of 0 {{.*}} trunc
%r16 = trunc i16 undef to i8
; CHECK: cost of 1 {{.*}} sext
%r17 = sext i16 undef to i32
; CHECK: cost of 1 {{.*}} zext
%r18 = zext i16 undef to i32
; CHECK: cost of 2 {{.*}} sext
%r19 = sext i16 undef to i64
; CHECK: cost of 1 {{.*}} zext
%r20 = zext i16 undef to i64
; CHECK: cost of 0 {{.*}} trunc
%r21 = trunc i32 undef to i1
; CHECK: cost of 0 {{.*}} trunc
%r22 = trunc i32 undef to i8
; CHECK: cost of 0 {{.*}} trunc
%r23 = trunc i32 undef to i16
; CHECK: cost of 1 {{.*}} sext
%r24 = sext i32 undef to i64
; CHECK: cost of 1 {{.*}} zext
%r25 = zext i32 undef to i64
; CHECK: cost of 0 {{.*}} trunc
%r26 = trunc i64 undef to i1
; CHECK: cost of 0 {{.*}} trunc
%r27 = trunc i64 undef to i8
; CHECK: cost of 0 {{.*}} trunc
%r28 = trunc i64 undef to i16
; CHECK: cost of 0 {{.*}} trunc
%r29 = trunc i64 undef to i32
; -- floating point conversions --
; Moves between scalar and NEON registers.
; CHECK: cost of 2 {{.*}} fptoui
%r30 = fptoui float undef to i1
; CHECK: cost of 2 {{.*}} fptosi
%r31 = fptosi float undef to i1
; CHECK: cost of 2 {{.*}} fptoui
%r32 = fptoui float undef to i8
; CHECK: cost of 2 {{.*}} fptosi
%r33 = fptosi float undef to i8
; CHECK: cost of 2 {{.*}} fptoui
%r34 = fptoui float undef to i16
; CHECK: cost of 2 {{.*}} fptosi
%r35 = fptosi float undef to i16
; CHECK: cost of 2 {{.*}} fptoui
%r36 = fptoui float undef to i32
; CHECK: cost of 2 {{.*}} fptosi
%r37 = fptosi float undef to i32
; CHECK: cost of 10 {{.*}} fptoui
%r38 = fptoui float undef to i64
; CHECK: cost of 10 {{.*}} fptosi
%r39 = fptosi float undef to i64
; CHECK: cost of 2 {{.*}} fptoui
%r40 = fptoui double undef to i1
; CHECK: cost of 2 {{.*}} fptosi
%r41 = fptosi double undef to i1
; CHECK: cost of 2 {{.*}} fptoui
%r42 = fptoui double undef to i8
; CHECK: cost of 2 {{.*}} fptosi
%r43 = fptosi double undef to i8
; CHECK: cost of 2 {{.*}} fptoui
%r44 = fptoui double undef to i16
; CHECK: cost of 2 {{.*}} fptosi
%r45 = fptosi double undef to i16
; CHECK: cost of 2 {{.*}} fptoui
%r46 = fptoui double undef to i32
; CHECK: cost of 2 {{.*}} fptosi
%r47 = fptosi double undef to i32
; Function call
; CHECK: cost of 10 {{.*}} fptoui
%r48 = fptoui double undef to i64
; CHECK: cost of 10 {{.*}} fptosi
%r49 = fptosi double undef to i64
; CHECK: cost of 2 {{.*}} sitofp
%r50 = sitofp i1 undef to float
; CHECK: cost of 2 {{.*}} uitofp
%r51 = uitofp i1 undef to float
; CHECK: cost of 2 {{.*}} sitofp
%r52 = sitofp i1 undef to double
; CHECK: cost of 2 {{.*}} uitofp
%r53 = uitofp i1 undef to double
; CHECK: cost of 2 {{.*}} sitofp
%r54 = sitofp i8 undef to float
; CHECK: cost of 2 {{.*}} uitofp
%r55 = uitofp i8 undef to float
; CHECK: cost of 2 {{.*}} sitofp
%r56 = sitofp i8 undef to double
; CHECK: cost of 2 {{.*}} uitofp
%r57 = uitofp i8 undef to double
; CHECK: cost of 2 {{.*}} sitofp
%r58 = sitofp i16 undef to float
; CHECK: cost of 2 {{.*}} uitofp
%r59 = uitofp i16 undef to float
; CHECK: cost of 2 {{.*}} sitofp
%r60 = sitofp i16 undef to double
; CHECK: cost of 2 {{.*}} uitofp
%r61 = uitofp i16 undef to double
; CHECK: cost of 2 {{.*}} sitofp
%r62 = sitofp i32 undef to float
; CHECK: cost of 2 {{.*}} uitofp
%r63 = uitofp i32 undef to float
; CHECK: cost of 2 {{.*}} sitofp
%r64 = sitofp i32 undef to double
; CHECK: cost of 2 {{.*}} uitofp
%r65 = uitofp i32 undef to double
; Function call
; CHECK: cost of 10 {{.*}} sitofp
%r66 = sitofp i64 undef to float
; CHECK: cost of 10 {{.*}} uitofp
%r67 = uitofp i64 undef to float
; CHECK: cost of 10 {{.*}} sitofp
%r68 = sitofp i64 undef to double
; CHECK: cost of 10 {{.*}} uitofp
%r69 = uitofp i64 undef to double
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}